LLVM 9.0.1
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/CallSite.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
83#include "llvm/Support/Debug.h"
85#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "ppc-lowering"
103
104static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
105cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
106
107static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
108cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
109
110static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableSCO("disable-ppc-sco",
114cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
115
116static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
118
119static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120cl::desc("enable quad precision float support on ppc"), cl::Hidden);
121
122STATISTIC(NumTailCalls, "Number of tail calls");
123STATISTIC(NumSiblingCalls, "Number of sibling calls");
124
125static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
126
127static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
128
129// FIXME: Remove this once the bug has been fixed!
131
133 const PPCSubtarget &STI)
134 : TargetLowering(TM), Subtarget(STI) {
135 // Use _setjmp/_longjmp instead of setjmp/longjmp.
138
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64 = Subtarget.isPPC64();
142 setMinStackArgumentAlignment(isPPC64 ? 8:4);
143
144 // Set up the register classes.
145 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146 if (!useSoftFloat()) {
147 if (hasSPE()) {
148 addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
149 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150 } else {
151 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153 }
154 }
155
156 // Match BITREVERSE to customized fast code sequence in the td file.
159
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
162
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT : MVT::integer_valuetypes()) {
167 }
168
170
171 // PowerPC has pre-inc load and store's.
182 if (!Subtarget.hasSPE()) {
187 }
188
189 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
191 for (MVT VT : ScalarIntVTs) {
196 }
197
198 if (Subtarget.useCRBits()) {
200
201 if (isPPC64 || Subtarget.hasFPCVT()) {
204 isPPC64 ? MVT::i64 : MVT::i32);
207 isPPC64 ? MVT::i64 : MVT::i32);
208 } else {
211 }
212
213 // PowerPC does not support direct load/store of condition registers.
216
217 // FIXME: Remove this once the ANDI glue bug is fixed:
218 if (ANDIGlueBug)
220
221 for (MVT VT : MVT::integer_valuetypes()) {
225 }
226
227 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
228 }
229
230 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231 // PPC (the libcall is not available).
234
235 // We do not currently implement these libm ops for PowerPC.
242
243 // PowerPC has no SREM/UREM instructions unless we are on P9
244 // On P9 we may use a hardware instruction to compute the remainder.
245 // The instructions are not legalized directly because in the cases where the
246 // result of both the remainder and the division is required it is more
247 // efficient to compute the remainder from the result of the division rather
248 // than use the remainder instruction.
249 if (Subtarget.isISA3_0()) {
254 } else {
259 }
260
261 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
270
271 // We don't support sin/cos/sqrt/fmod/pow
282 if (Subtarget.hasSPE()) {
285 } else {
288 }
289
291
292 // If we're enabling GP optimizations, use hardware square root
293 if (!Subtarget.hasFSQRT() &&
294 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
295 Subtarget.hasFRE()))
297
298 if (!Subtarget.hasFSQRT() &&
299 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
300 Subtarget.hasFRES()))
302
303 if (Subtarget.hasFCPSGN()) {
306 } else {
309 }
310
311 if (Subtarget.hasFPRND()) {
316
321 }
322
323 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324 // to speed up scalar BSWAP64.
325 // CTPOP or CTTZ were introduced in P8/P9 respectively
327 if (Subtarget.hasP9Vector())
329 else
331 if (Subtarget.isISA3_0()) {
334 } else {
337 }
338
339 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
342 } else {
345 }
346
347 // PowerPC does not have ROTR
350
351 if (!Subtarget.useCRBits()) {
352 // PowerPC does not have Select
357 }
358
359 // PowerPC wants to turn select_cc of FP into fsel when possible.
362
363 // PowerPC wants to optimize integer setcc a bit
364 if (!Subtarget.useCRBits())
366
367 // PowerPC does not have BRCOND which requires SetCC
368 if (!Subtarget.useCRBits())
370
372
373 if (Subtarget.hasSPE()) {
374 // SPE has built-in conversions
378 } else {
379 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
381
382 // PowerPC does not have [U|S]INT_TO_FP
385 }
386
387 if (Subtarget.hasDirectMove() && isPPC64) {
392 } else {
397 }
398
399 // We cannot sextinreg(i1). Expand to shifts.
401
402 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404 // support continuation, user-level threading, and etc.. As a result, no
405 // other SjLj exception interfaces are implemented and please don't build
406 // your own exception handling based on them.
407 // LLVM/Clang supports zero-cost DWARF exception handling.
410
411 // We want to legalize GlobalAddress and ConstantPool nodes into the
412 // appropriate instructions to materialize the address.
423
424 // TRAP is legal.
426
427 // TRAMPOLINE is custom lowered.
430
431 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
433
434 if (Subtarget.isSVR4ABI()) {
435 if (isPPC64) {
436 // VAARG always uses double-word chunks, so promote anything smaller.
446 } else {
447 // VAARG is custom lowered with the 32-bit SVR4 ABI.
450 }
451 } else
453
454 if (Subtarget.isSVR4ABI() && !isPPC64)
455 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
457 else
459
460 // Use the default implementation.
470
471 // We want to custom lower some of our intrinsics.
473
474 // To handle counter-based loop conditions.
476
481
482 // Comparisons that require checking two conditions.
483 if (Subtarget.hasSPE()) {
488 }
501
502 if (Subtarget.has64BitSupport()) {
503 // They also have instructions for converting between i64 and fp.
508 // This is just the low 32 bits of a (signed) fp->i64 conversion.
509 // We cannot do this with Promote because i64 is not a legal type.
511
512 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
514 } else {
515 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
516 if (Subtarget.hasSPE())
518 else
520 }
521
522 // With the instructions enabled under FPCVT, we can do everything.
523 if (Subtarget.hasFPCVT()) {
524 if (Subtarget.has64BitSupport()) {
529 }
530
535 }
536
537 if (Subtarget.use64BitRegs()) {
538 // 64-bit PowerPC implementations can support i64 types directly
539 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
540 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
542 // 64-bit PowerPC wants to expand i128 shifts itself.
546 } else {
547 // 32-bit PowerPC wants to expand i64 shifts itself.
551 }
552
553 if (Subtarget.hasAltivec()) {
554 // First set operation action for all vector types to expand. Then we
555 // will selectively turn on ones that can be effectively codegen'd.
556 for (MVT VT : MVT::vector_valuetypes()) {
557 // add/sub are legal for all supported vector VT's.
560
561 // For v2i64, these are only valid with P8Vector. This is corrected after
562 // the loop.
567
568 if (Subtarget.hasVSX()) {
571 }
572
573 // Vector instructions introduced in P8
574 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
577 }
578 else {
581 }
582
583 // Vector instructions introduced in P9
584 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
586 else
588
589 // We promote all shuffles to v16i8.
592
593 // We promote all non-typed operations to v4i32.
609
610 // No other operations are legal.
648
649 for (MVT InnerVT : MVT::vector_valuetypes()) {
650 setTruncStoreAction(VT, InnerVT, Expand);
653 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
654 }
655 }
656 if (!Subtarget.hasP8Vector()) {
661 }
662
663 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
665
666 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
667 // with merges, splats, etc.
669
670 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
671 // are cheap, so handle them before they get expanded to scalar.
677
683 Subtarget.useCRBits() ? Legal : Expand);
693
694 // Without hasP8Altivec set, v2i64 SMAX isn't available.
695 // But ABS custom lowering requires SMAX support.
696 if (!Subtarget.hasP8Altivec())
698
699 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
700 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
701 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
702 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
703
706
707 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
710 }
711
712 if (Subtarget.hasP8Altivec())
714 else
716
719
722
727
728 // Altivec does not contain unordered floating-point compare instructions
733
734 if (Subtarget.hasVSX()) {
737 if (Subtarget.hasP8Vector()) {
740 }
741 if (Subtarget.hasDirectMove() && isPPC64) {
750 }
752
758
760
763
766
767 // Share the Altivec comparison restrictions.
772
775
777
778 if (Subtarget.hasP8Vector())
779 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
780
781 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
782
783 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
784 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
785 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
786
787 if (Subtarget.hasP8Altivec()) {
791
792 // 128 bit shifts can be accomplished via 3 instructions for SHL and
793 // SRL, but not for SRA because of the instructions available:
794 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
795 // doing
799
801 }
802 else {
806
808
809 // VSX v2i64 only supports non-arithmetic operations.
812 }
813
818
820
825
826 // Custom handling for partial vectors of integers converted to
827 // floating point. We already have optimal handling for v2i32 through
828 // the DAG combine, so those aren't necessary.
837
844
845 if (Subtarget.hasDirectMove())
848
849 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
850 }
851
852 if (Subtarget.hasP8Altivec()) {
853 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
854 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
855 }
856
857 if (Subtarget.hasP9Vector()) {
860
861 // 128 bit shifts can be accomplished via 3 instructions for SHL and
862 // SRL, but not for SRA because of the instructions available:
863 // VS{RL} and VS{RL}O.
867
869 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
875 // No extending loads to f128 on PPC.
876 for (MVT FPT : MVT::fp_valuetypes())
885
892
899 // No implementation for these ops for PowerPC.
905 }
907
908 }
909
910 if (Subtarget.hasP9Altivec()) {
913 }
914 }
915
916 if (Subtarget.hasQPX()) {
921
924
927
930
931 if (!Subtarget.useCRBits())
934
942
945
949
960
963
966
967 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
968
973
976
979
980 if (!Subtarget.useCRBits())
983
991
994
1005
1008
1011
1012 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1013
1017
1018 if (!Subtarget.useCRBits())
1021
1024
1032
1035
1036 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1037
1042
1047
1050
1051 // These need to set FE_INEXACT, and so cannot be vectorized here.
1054
1055 if (TM.Options.UnsafeFPMath) {
1058
1061 } else {
1064
1067 }
1068 }
1069
1070 if (Subtarget.has64BitSupport())
1072
1074
1075 if (!isPPC64) {
1078 }
1079
1081
1082 if (Subtarget.hasAltivec()) {
1083 // Altivec instructions set fields to all zeros or all ones.
1085 }
1086
1087 if (!isPPC64) {
1088 // These libcalls are not available in 32-bit.
1089 setLibcallName(RTLIB::SHL_I128, nullptr);
1090 setLibcallName(RTLIB::SRL_I128, nullptr);
1091 setLibcallName(RTLIB::SRA_I128, nullptr);
1092 }
1093
1094 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1095
1096 // We have target-specific dag combine patterns for the following nodes:
1104 if (Subtarget.hasFPCVT())
1109 if (Subtarget.useCRBits())
1115
1119
1121
1122 if (Subtarget.useCRBits()) {
1126 }
1127
1128 // Use reciprocal estimates.
1129 if (TM.Options.UnsafeFPMath) {
1132 }
1133
1134 if (Subtarget.hasP9Altivec()) {
1137 }
1138
1139 // Darwin long double math library functions have $LDBL128 appended.
1140 if (Subtarget.isDarwin()) {
1141 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
1142 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
1143 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
1144 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
1145 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
1146 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
1147 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
1148 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
1149 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
1150 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
1151 }
1152
1153 if (EnableQuadPrecision) {
1154 setLibcallName(RTLIB::LOG_F128, "logf128");
1155 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1156 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1157 setLibcallName(RTLIB::EXP_F128, "expf128");
1158 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1159 setLibcallName(RTLIB::SIN_F128, "sinf128");
1160 setLibcallName(RTLIB::COS_F128, "cosf128");
1161 setLibcallName(RTLIB::POW_F128, "powf128");
1162 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1163 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1164 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1165 setLibcallName(RTLIB::REM_F128, "fmodf128");
1166 }
1167
1168 // With 32 condition bits, we don't need to sink (and duplicate) compares
1169 // aggressively in CodeGenPrep.
1170 if (Subtarget.useCRBits()) {
1173 }
1174
1176 if (Subtarget.isDarwin())
1178
1179 switch (Subtarget.getDarwinDirective()) {
1180 default: break;
1181 case PPC::DIR_970:
1182 case PPC::DIR_A2:
1183 case PPC::DIR_E500:
1184 case PPC::DIR_E500mc:
1185 case PPC::DIR_E5500:
1186 case PPC::DIR_PWR4:
1187 case PPC::DIR_PWR5:
1188 case PPC::DIR_PWR5X:
1189 case PPC::DIR_PWR6:
1190 case PPC::DIR_PWR6X:
1191 case PPC::DIR_PWR7:
1192 case PPC::DIR_PWR8:
1193 case PPC::DIR_PWR9:
1196 break;
1197 }
1198
1199 if (Subtarget.enableMachineScheduler())
1201 else
1203
1205
1206 // The Freescale cores do better with aggressive inlining of memcpy and
1207 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1208 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
1209 Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
1210 MaxStoresPerMemset = 32;
1212 MaxStoresPerMemcpy = 32;
1216 } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
1217 // The A2 also benefits from (very) aggressive inlining of memcpy and
1218 // friends. The overhead of a the function call, even when warm, can be
1219 // over one hundred cycles.
1220 MaxStoresPerMemset = 128;
1221 MaxStoresPerMemcpy = 128;
1222 MaxStoresPerMemmove = 128;
1223 MaxLoadsPerMemcmp = 128;
1224 } else {
1227 }
1228}
1229
1230/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1231/// the desired ByVal argument alignment.
1232static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
1233 unsigned MaxMaxAlign) {
1234 if (MaxAlign == MaxMaxAlign)
1235 return;
1236 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1237 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
1238 MaxAlign = 32;
1239 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
1240 MaxAlign = 16;
1241 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1242 unsigned EltAlign = 0;
1243 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1244 if (EltAlign > MaxAlign)
1245 MaxAlign = EltAlign;
1246 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1247 for (auto *EltTy : STy->elements()) {
1248 unsigned EltAlign = 0;
1249 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1250 if (EltAlign > MaxAlign)
1251 MaxAlign = EltAlign;
1252 if (MaxAlign == MaxMaxAlign)
1253 break;
1254 }
1255 }
1256}
1257
1258/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1259/// function arguments in the caller parameter area.
1261 const DataLayout &DL) const {
1262 // Darwin passes everything on 4 byte boundary.
1263 if (Subtarget.isDarwin())
1264 return 4;
1265
1266 // 16byte and wider vectors are passed on 16byte boundary.
1267 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1268 unsigned Align = Subtarget.isPPC64() ? 8 : 4;
1269 if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1270 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
1271 return Align;
1272}
1273
1275 return Subtarget.useSoftFloat();
1276}
1277
1279 return Subtarget.hasSPE();
1280}
1281
1283 return VT.isScalarInteger();
1284}
1285
1286const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1287 switch ((PPCISD::NodeType)Opcode) {
1288 case PPCISD::FIRST_NUMBER: break;
1289 case PPCISD::FSEL: return "PPCISD::FSEL";
1290 case PPCISD::FCFID: return "PPCISD::FCFID";
1291 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1292 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1293 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1294 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1295 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1296 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1297 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1299 return "PPCISD::FP_TO_UINT_IN_VSR,";
1301 return "PPCISD::FP_TO_SINT_IN_VSR";
1302 case PPCISD::FRE: return "PPCISD::FRE";
1303 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1304 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1305 case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
1306 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
1307 case PPCISD::VPERM: return "PPCISD::VPERM";
1308 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1309 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1310 case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
1311 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1312 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1313 case PPCISD::CMPB: return "PPCISD::CMPB";
1314 case PPCISD::Hi: return "PPCISD::Hi";
1315 case PPCISD::Lo: return "PPCISD::Lo";
1316 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1317 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1318 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1319 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1320 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1321 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1322 case PPCISD::SRL: return "PPCISD::SRL";
1323 case PPCISD::SRA: return "PPCISD::SRA";
1324 case PPCISD::SHL: return "PPCISD::SHL";
1325 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1326 case PPCISD::CALL: return "PPCISD::CALL";
1327 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1328 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1329 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1330 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1331 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1332 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1333 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1334 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1335 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1336 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1337 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1338 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1339 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1340 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1341 case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
1342 case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
1343 case PPCISD::VCMP: return "PPCISD::VCMP";
1344 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1345 case PPCISD::LBRX: return "PPCISD::LBRX";
1346 case PPCISD::STBRX: return "PPCISD::STBRX";
1347 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1348 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1349 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1350 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1351 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1352 case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
1353 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1354 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1356 return "PPCISD::ST_VSR_SCAL_INT";
1357 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1358 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1359 case PPCISD::BDZ: return "PPCISD::BDZ";
1360 case PPCISD::MFFS: return "PPCISD::MFFS";
1361 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1362 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1363 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1364 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1365 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1366 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1367 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1368 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1369 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1370 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1371 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1372 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1373 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1374 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1375 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1376 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1377 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1378 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1379 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1380 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1381 case PPCISD::SC: return "PPCISD::SC";
1382 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1383 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1384 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1385 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1386 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1387 case PPCISD::VABSD: return "PPCISD::VABSD";
1388 case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1389 case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1390 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1391 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1392 case PPCISD::QBFLT: return "PPCISD::QBFLT";
1393 case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1394 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1395 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1396 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1397 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1398 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1399 case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
1400 }
1401 return nullptr;
1402}
1403
1405 EVT VT) const {
1406 if (!VT.isVector())
1407 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1408
1409 if (Subtarget.hasQPX())
1411
1413}
1414
1416 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1417 return true;
1418}
1419
1420//===----------------------------------------------------------------------===//
1421// Node matching predicates, for use by the tblgen matching code.
1422//===----------------------------------------------------------------------===//
1423
1424/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1426 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1427 return CFP->getValueAPF().isZero();
1428 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1429 // Maybe this has already been legalized into the constant pool?
1430 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1431 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1432 return CFP->getValueAPF().isZero();
1433 }
1434 return false;
1435}
1436
1437/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1438/// true if Op is undef or if it matches the specified value.
1439static bool isConstantOrUndef(int Op, int Val) {
1440 return Op < 0 || Op == Val;
1441}
1442
1443/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1444/// VPKUHUM instruction.
1445/// The ShuffleKind distinguishes between big-endian operations with
1446/// two different inputs (0), either-endian operations with two identical
1447/// inputs (1), and little-endian operations with two different inputs (2).
1448/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1450 SelectionDAG &DAG) {
1451 bool IsLE = DAG.getDataLayout().isLittleEndian();
1452 if (ShuffleKind == 0) {
1453 if (IsLE)
1454 return false;
1455 for (unsigned i = 0; i != 16; ++i)
1456 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1457 return false;
1458 } else if (ShuffleKind == 2) {
1459 if (!IsLE)
1460 return false;
1461 for (unsigned i = 0; i != 16; ++i)
1462 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1463 return false;
1464 } else if (ShuffleKind == 1) {
1465 unsigned j = IsLE ? 0 : 1;
1466 for (unsigned i = 0; i != 8; ++i)
1467 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1468 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1469 return false;
1470 }
1471 return true;
1472}
1473
1474/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1475/// VPKUWUM instruction.
1476/// The ShuffleKind distinguishes between big-endian operations with
1477/// two different inputs (0), either-endian operations with two identical
1478/// inputs (1), and little-endian operations with two different inputs (2).
1479/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1481 SelectionDAG &DAG) {
1482 bool IsLE = DAG.getDataLayout().isLittleEndian();
1483 if (ShuffleKind == 0) {
1484 if (IsLE)
1485 return false;
1486 for (unsigned i = 0; i != 16; i += 2)
1487 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1488 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1489 return false;
1490 } else if (ShuffleKind == 2) {
1491 if (!IsLE)
1492 return false;
1493 for (unsigned i = 0; i != 16; i += 2)
1494 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1495 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1496 return false;
1497 } else if (ShuffleKind == 1) {
1498 unsigned j = IsLE ? 0 : 2;
1499 for (unsigned i = 0; i != 8; i += 2)
1500 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1501 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1502 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1503 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1504 return false;
1505 }
1506 return true;
1507}
1508
1509/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1510/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1511/// current subtarget.
1512///
1513/// The ShuffleKind distinguishes between big-endian operations with
1514/// two different inputs (0), either-endian operations with two identical
1515/// inputs (1), and little-endian operations with two different inputs (2).
1516/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1518 SelectionDAG &DAG) {
1519 const PPCSubtarget& Subtarget =
1520 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1521 if (!Subtarget.hasP8Vector())
1522 return false;
1523
1524 bool IsLE = DAG.getDataLayout().isLittleEndian();
1525 if (ShuffleKind == 0) {
1526 if (IsLE)
1527 return false;
1528 for (unsigned i = 0; i != 16; i += 4)
1529 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1530 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1531 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1532 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1533 return false;
1534 } else if (ShuffleKind == 2) {
1535 if (!IsLE)
1536 return false;
1537 for (unsigned i = 0; i != 16; i += 4)
1538 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1539 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1540 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1541 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1542 return false;
1543 } else if (ShuffleKind == 1) {
1544 unsigned j = IsLE ? 0 : 4;
1545 for (unsigned i = 0; i != 8; i += 4)
1546 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1547 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1548 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1549 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1550 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1551 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1552 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1553 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1554 return false;
1555 }
1556 return true;
1557}
1558
1559/// isVMerge - Common function, used to match vmrg* shuffles.
1560///
1561static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1562 unsigned LHSStart, unsigned RHSStart) {
1563 if (N->getValueType(0) != MVT::v16i8)
1564 return false;
1565 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1566 "Unsupported merge size!");
1567
1568 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1569 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1570 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1571 LHSStart+j+i*UnitSize) ||
1572 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1573 RHSStart+j+i*UnitSize))
1574 return false;
1575 }
1576 return true;
1577}
1578
1579/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1580/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1581/// The ShuffleKind distinguishes between big-endian merges with two
1582/// different inputs (0), either-endian merges with two identical inputs (1),
1583/// and little-endian merges with two different inputs (2). For the latter,
1584/// the input operands are swapped (see PPCInstrAltivec.td).
1586 unsigned ShuffleKind, SelectionDAG &DAG) {
1587 if (DAG.getDataLayout().isLittleEndian()) {
1588 if (ShuffleKind == 1) // unary
1589 return isVMerge(N, UnitSize, 0, 0);
1590 else if (ShuffleKind == 2) // swapped
1591 return isVMerge(N, UnitSize, 0, 16);
1592 else
1593 return false;
1594 } else {
1595 if (ShuffleKind == 1) // unary
1596 return isVMerge(N, UnitSize, 8, 8);
1597 else if (ShuffleKind == 0) // normal
1598 return isVMerge(N, UnitSize, 8, 24);
1599 else
1600 return false;
1601 }
1602}
1603
1604/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1605/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1606/// The ShuffleKind distinguishes between big-endian merges with two
1607/// different inputs (0), either-endian merges with two identical inputs (1),
1608/// and little-endian merges with two different inputs (2). For the latter,
1609/// the input operands are swapped (see PPCInstrAltivec.td).
1611 unsigned ShuffleKind, SelectionDAG &DAG) {
1612 if (DAG.getDataLayout().isLittleEndian()) {
1613 if (ShuffleKind == 1) // unary
1614 return isVMerge(N, UnitSize, 8, 8);
1615 else if (ShuffleKind == 2) // swapped
1616 return isVMerge(N, UnitSize, 8, 24);
1617 else
1618 return false;
1619 } else {
1620 if (ShuffleKind == 1) // unary
1621 return isVMerge(N, UnitSize, 0, 0);
1622 else if (ShuffleKind == 0) // normal
1623 return isVMerge(N, UnitSize, 0, 16);
1624 else
1625 return false;
1626 }
1627}
1628
1629/**
1630 * Common function used to match vmrgew and vmrgow shuffles
1631 *
1632 * The indexOffset determines whether to look for even or odd words in
1633 * the shuffle mask. This is based on the of the endianness of the target
1634 * machine.
1635 * - Little Endian:
1636 * - Use offset of 0 to check for odd elements
1637 * - Use offset of 4 to check for even elements
1638 * - Big Endian:
1639 * - Use offset of 0 to check for even elements
1640 * - Use offset of 4 to check for odd elements
1641 * A detailed description of the vector element ordering for little endian and
1642 * big endian can be found at
1643 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1644 * Targeting your applications - what little endian and big endian IBM XL C/C++
1645 * compiler differences mean to you
1646 *
1647 * The mask to the shuffle vector instruction specifies the indices of the
1648 * elements from the two input vectors to place in the result. The elements are
1649 * numbered in array-access order, starting with the first vector. These vectors
1650 * are always of type v16i8, thus each vector will contain 16 elements of size
1651 * 8. More info on the shuffle vector can be found in the
1652 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1653 * Language Reference.
1654 *
1655 * The RHSStartValue indicates whether the same input vectors are used (unary)
1656 * or two different input vectors are used, based on the following:
1657 * - If the instruction uses the same vector for both inputs, the range of the
1658 * indices will be 0 to 15. In this case, the RHSStart value passed should
1659 * be 0.
1660 * - If the instruction has two different vectors then the range of the
1661 * indices will be 0 to 31. In this case, the RHSStart value passed should
1662 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1663 * to 31 specify elements in the second vector).
1664 *
1665 * \param[in] N The shuffle vector SD Node to analyze
1666 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1667 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1668 * vector to the shuffle_vector instruction
1669 * \return true iff this shuffle vector represents an even or odd word merge
1670 */
1671static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1672 unsigned RHSStartValue) {
1673 if (N->getValueType(0) != MVT::v16i8)
1674 return false;
1675
1676 for (unsigned i = 0; i < 2; ++i)
1677 for (unsigned j = 0; j < 4; ++j)
1678 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1679 i*RHSStartValue+j+IndexOffset) ||
1680 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1681 i*RHSStartValue+j+IndexOffset+8))
1682 return false;
1683 return true;
1684}
1685
1686/**
1687 * Determine if the specified shuffle mask is suitable for the vmrgew or
1688 * vmrgow instructions.
1689 *
1690 * \param[in] N The shuffle vector SD Node to analyze
1691 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1692 * \param[in] ShuffleKind Identify the type of merge:
1693 * - 0 = big-endian merge with two different inputs;
1694 * - 1 = either-endian merge with two identical inputs;
1695 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1696 * little-endian merges).
1697 * \param[in] DAG The current SelectionDAG
1698 * \return true iff this shuffle mask
1699 */
1701 unsigned ShuffleKind, SelectionDAG &DAG) {
1702 if (DAG.getDataLayout().isLittleEndian()) {
1703 unsigned indexOffset = CheckEven ? 4 : 0;
1704 if (ShuffleKind == 1) // Unary
1705 return isVMerge(N, indexOffset, 0);
1706 else if (ShuffleKind == 2) // swapped
1707 return isVMerge(N, indexOffset, 16);
1708 else
1709 return false;
1710 }
1711 else {
1712 unsigned indexOffset = CheckEven ? 0 : 4;
1713 if (ShuffleKind == 1) // Unary
1714 return isVMerge(N, indexOffset, 0);
1715 else if (ShuffleKind == 0) // Normal
1716 return isVMerge(N, indexOffset, 16);
1717 else
1718 return false;
1719 }
1720 return false;
1721}
1722
1723/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1724/// amount, otherwise return -1.
1725/// The ShuffleKind distinguishes between big-endian operations with two
1726/// different inputs (0), either-endian operations with two identical inputs
1727/// (1), and little-endian operations with two different inputs (2). For the
1728/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1729int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1730 SelectionDAG &DAG) {
1731 if (N->getValueType(0) != MVT::v16i8)
1732 return -1;
1733
1734 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1735
1736 // Find the first non-undef value in the shuffle mask.
1737 unsigned i;
1738 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1739 /*search*/;
1740
1741 if (i == 16) return -1; // all undef.
1742
1743 // Otherwise, check to see if the rest of the elements are consecutively
1744 // numbered from this value.
1745 unsigned ShiftAmt = SVOp->getMaskElt(i);
1746 if (ShiftAmt < i) return -1;
1747
1748 ShiftAmt -= i;
1749 bool isLE = DAG.getDataLayout().isLittleEndian();
1750
1751 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1752 // Check the rest of the elements to see if they are consecutive.
1753 for (++i; i != 16; ++i)
1754 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1755 return -1;
1756 } else if (ShuffleKind == 1) {
1757 // Check the rest of the elements to see if they are consecutive.
1758 for (++i; i != 16; ++i)
1759 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1760 return -1;
1761 } else
1762 return -1;
1763
1764 if (isLE)
1765 ShiftAmt = 16 - ShiftAmt;
1766
1767 return ShiftAmt;
1768}
1769
1770/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1771/// specifies a splat of a single element that is suitable for input to
1772/// VSPLTB/VSPLTH/VSPLTW.
1774 assert(N->getValueType(0) == MVT::v16i8 &&
1775 (EltSize == 1 || EltSize == 2 || EltSize == 4));
1776
1777 // The consecutive indices need to specify an element, not part of two
1778 // different elements. So abandon ship early if this isn't the case.
1779 if (N->getMaskElt(0) % EltSize != 0)
1780 return false;
1781
1782 // This is a splat operation if each element of the permute is the same, and
1783 // if the value doesn't reference the second vector.
1784 unsigned ElementBase = N->getMaskElt(0);
1785
1786 // FIXME: Handle UNDEF elements too!
1787 if (ElementBase >= 16)
1788 return false;
1789
1790 // Check that the indices are consecutive, in the case of a multi-byte element
1791 // splatted with a v16i8 mask.
1792 for (unsigned i = 1; i != EltSize; ++i)
1793 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1794 return false;
1795
1796 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1797 if (N->getMaskElt(i) < 0) continue;
1798 for (unsigned j = 0; j != EltSize; ++j)
1799 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1800 return false;
1801 }
1802 return true;
1803}
1804
1805/// Check that the mask is shuffling N byte elements. Within each N byte
1806/// element of the mask, the indices could be either in increasing or
1807/// decreasing order as long as they are consecutive.
1808/// \param[in] N the shuffle vector SD Node to analyze
1809/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1810/// Word/DoubleWord/QuadWord).
1811/// \param[in] StepLen the delta indices number among the N byte element, if
1812/// the mask is in increasing/decreasing order then it is 1/-1.
1813/// \return true iff the mask is shuffling N byte elements.
1814static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1815 int StepLen) {
1816 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1817 "Unexpected element width.");
1818 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1819
1820 unsigned NumOfElem = 16 / Width;
1821 unsigned MaskVal[16]; // Width is never greater than 16
1822 for (unsigned i = 0; i < NumOfElem; ++i) {
1823 MaskVal[0] = N->getMaskElt(i * Width);
1824 if ((StepLen == 1) && (MaskVal[0] % Width)) {
1825 return false;
1826 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1827 return false;
1828 }
1829
1830 for (unsigned int j = 1; j < Width; ++j) {
1831 MaskVal[j] = N->getMaskElt(i * Width + j);
1832 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1833 return false;
1834 }
1835 }
1836 }
1837
1838 return true;
1839}
1840
1841bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1842 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1843 if (!isNByteElemShuffleMask(N, 4, 1))
1844 return false;
1845
1846 // Now we look at mask elements 0,4,8,12
1847 unsigned M0 = N->getMaskElt(0) / 4;
1848 unsigned M1 = N->getMaskElt(4) / 4;
1849 unsigned M2 = N->getMaskElt(8) / 4;
1850 unsigned M3 = N->getMaskElt(12) / 4;
1851 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1852 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1853
1854 // Below, let H and L be arbitrary elements of the shuffle mask
1855 // where H is in the range [4,7] and L is in the range [0,3].
1856 // H, 1, 2, 3 or L, 5, 6, 7
1857 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1858 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1859 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1860 InsertAtByte = IsLE ? 12 : 0;
1861 Swap = M0 < 4;
1862 return true;
1863 }
1864 // 0, H, 2, 3 or 4, L, 6, 7
1865 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1866 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1867 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1868 InsertAtByte = IsLE ? 8 : 4;
1869 Swap = M1 < 4;
1870 return true;
1871 }
1872 // 0, 1, H, 3 or 4, 5, L, 7
1873 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1874 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1875 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1876 InsertAtByte = IsLE ? 4 : 8;
1877 Swap = M2 < 4;
1878 return true;
1879 }
1880 // 0, 1, 2, H or 4, 5, 6, L
1881 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1882 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1883 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1884 InsertAtByte = IsLE ? 0 : 12;
1885 Swap = M3 < 4;
1886 return true;
1887 }
1888
1889 // If both vector operands for the shuffle are the same vector, the mask will
1890 // contain only elements from the first one and the second one will be undef.
1891 if (N->getOperand(1).isUndef()) {
1892 ShiftElts = 0;
1893 Swap = true;
1894 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1895 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1896 InsertAtByte = IsLE ? 12 : 0;
1897 return true;
1898 }
1899 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1900 InsertAtByte = IsLE ? 8 : 4;
1901 return true;
1902 }
1903 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1904 InsertAtByte = IsLE ? 4 : 8;
1905 return true;
1906 }
1907 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1908 InsertAtByte = IsLE ? 0 : 12;
1909 return true;
1910 }
1911 }
1912
1913 return false;
1914}
1915
1917 bool &Swap, bool IsLE) {
1918 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1919 // Ensure each byte index of the word is consecutive.
1920 if (!isNByteElemShuffleMask(N, 4, 1))
1921 return false;
1922
1923 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1924 unsigned M0 = N->getMaskElt(0) / 4;
1925 unsigned M1 = N->getMaskElt(4) / 4;
1926 unsigned M2 = N->getMaskElt(8) / 4;
1927 unsigned M3 = N->getMaskElt(12) / 4;
1928
1929 // If both vector operands for the shuffle are the same vector, the mask will
1930 // contain only elements from the first one and the second one will be undef.
1931 if (N->getOperand(1).isUndef()) {
1932 assert(M0 < 4 && "Indexing into an undef vector?");
1933 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
1934 return false;
1935
1936 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
1937 Swap = false;
1938 return true;
1939 }
1940
1941 // Ensure each word index of the ShuffleVector Mask is consecutive.
1942 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
1943 return false;
1944
1945 if (IsLE) {
1946 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
1947 // Input vectors don't need to be swapped if the leading element
1948 // of the result is one of the 3 left elements of the second vector
1949 // (or if there is no shift to be done at all).
1950 Swap = false;
1951 ShiftElts = (8 - M0) % 8;
1952 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
1953 // Input vectors need to be swapped if the leading element
1954 // of the result is one of the 3 left elements of the first vector
1955 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1956 Swap = true;
1957 ShiftElts = (4 - M0) % 4;
1958 }
1959
1960 return true;
1961 } else { // BE
1962 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
1963 // Input vectors don't need to be swapped if the leading element
1964 // of the result is one of the 4 elements of the first vector.
1965 Swap = false;
1966 ShiftElts = M0;
1967 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
1968 // Input vectors need to be swapped if the leading element
1969 // of the result is one of the 4 elements of the right vector.
1970 Swap = true;
1971 ShiftElts = M0 - 4;
1972 }
1973
1974 return true;
1975 }
1976}
1977
1979 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
1980
1981 if (!isNByteElemShuffleMask(N, Width, -1))
1982 return false;
1983
1984 for (int i = 0; i < 16; i += Width)
1985 if (N->getMaskElt(i) != i + Width - 1)
1986 return false;
1987
1988 return true;
1989}
1990
1992 return isXXBRShuffleMaskHelper(N, 2);
1993}
1994
1996 return isXXBRShuffleMaskHelper(N, 4);
1997}
1998
2000 return isXXBRShuffleMaskHelper(N, 8);
2001}
2002
2004 return isXXBRShuffleMaskHelper(N, 16);
2005}
2006
2007/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2008/// if the inputs to the instruction should be swapped and set \p DM to the
2009/// value for the immediate.
2010/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2011/// AND element 0 of the result comes from the first input (LE) or second input
2012/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2013/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2014/// mask.
2016 bool &Swap, bool IsLE) {
2017 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2018
2019 // Ensure each byte index of the double word is consecutive.
2020 if (!isNByteElemShuffleMask(N, 8, 1))
2021 return false;
2022
2023 unsigned M0 = N->getMaskElt(0) / 8;
2024 unsigned M1 = N->getMaskElt(8) / 8;
2025 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2026
2027 // If both vector operands for the shuffle are the same vector, the mask will
2028 // contain only elements from the first one and the second one will be undef.
2029 if (N->getOperand(1).isUndef()) {
2030 if ((M0 | M1) < 2) {
2031 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2032 Swap = false;
2033 return true;
2034 } else
2035 return false;
2036 }
2037
2038 if (IsLE) {
2039 if (M0 > 1 && M1 < 2) {
2040 Swap = false;
2041 } else if (M0 < 2 && M1 > 1) {
2042 M0 = (M0 + 2) % 4;
2043 M1 = (M1 + 2) % 4;
2044 Swap = true;
2045 } else
2046 return false;
2047
2048 // Note: if control flow comes here that means Swap is already set above
2049 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2050 return true;
2051 } else { // BE
2052 if (M0 < 2 && M1 > 1) {
2053 Swap = false;
2054 } else if (M0 > 1 && M1 < 2) {
2055 M0 = (M0 + 2) % 4;
2056 M1 = (M1 + 2) % 4;
2057 Swap = true;
2058 } else
2059 return false;
2060
2061 // Note: if control flow comes here that means Swap is already set above
2062 DM = (M0 << 1) + (M1 & 1);
2063 return true;
2064 }
2065}
2066
2067
2068/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2069/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2070unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
2071 SelectionDAG &DAG) {
2072 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2073 assert(isSplatShuffleMask(SVOp, EltSize));
2074 if (DAG.getDataLayout().isLittleEndian())
2075 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2076 else
2077 return SVOp->getMaskElt(0) / EltSize;
2078}
2079
2080/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2081/// by using a vspltis[bhw] instruction of the specified element size, return
2082/// the constant being splatted. The ByteSize field indicates the number of
2083/// bytes of each element [124] -> [bhw].
2085 SDValue OpVal(nullptr, 0);
2086
2087 // If ByteSize of the splat is bigger than the element size of the
2088 // build_vector, then we have a case where we are checking for a splat where
2089 // multiple elements of the buildvector are folded together into a single
2090 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2091 unsigned EltSize = 16/N->getNumOperands();
2092 if (EltSize < ByteSize) {
2093 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2094 SDValue UniquedVals[4];
2095 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2096
2097 // See if all of the elements in the buildvector agree across.
2098 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2099 if (N->getOperand(i).isUndef()) continue;
2100 // If the element isn't a constant, bail fully out.
2101 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2102
2103 if (!UniquedVals[i&(Multiple-1)].getNode())
2104 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2105 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2106 return SDValue(); // no match.
2107 }
2108
2109 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2110 // either constant or undef values that are identical for each chunk. See
2111 // if these chunks can form into a larger vspltis*.
2112
2113 // Check to see if all of the leading entries are either 0 or -1. If
2114 // neither, then this won't fit into the immediate field.
2115 bool LeadingZero = true;
2116 bool LeadingOnes = true;
2117 for (unsigned i = 0; i != Multiple-1; ++i) {
2118 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2119
2120 LeadingZero &= isNullConstant(UniquedVals[i]);
2121 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2122 }
2123 // Finally, check the least significant entry.
2124 if (LeadingZero) {
2125 if (!UniquedVals[Multiple-1].getNode())
2126 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2127 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2128 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2129 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2130 }
2131 if (LeadingOnes) {
2132 if (!UniquedVals[Multiple-1].getNode())
2133 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2134 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2135 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2136 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2137 }
2138
2139 return SDValue();
2140 }
2141
2142 // Check to see if this buildvec has a single non-undef value in its elements.
2143 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2144 if (N->getOperand(i).isUndef()) continue;
2145 if (!OpVal.getNode())
2146 OpVal = N->getOperand(i);
2147 else if (OpVal != N->getOperand(i))
2148 return SDValue();
2149 }
2150
2151 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2152
2153 unsigned ValSizeInBytes = EltSize;
2154 uint64_t Value = 0;
2155 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2156 Value = CN->getZExtValue();
2157 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2158 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2159 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2160 }
2161
2162 // If the splat value is larger than the element value, then we can never do
2163 // this splat. The only case that we could fit the replicated bits into our
2164 // immediate field for would be zero, and we prefer to use vxor for it.
2165 if (ValSizeInBytes < ByteSize) return SDValue();
2166
2167 // If the element value is larger than the splat value, check if it consists
2168 // of a repeated bit pattern of size ByteSize.
2169 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2170 return SDValue();
2171
2172 // Properly sign extend the value.
2173 int MaskVal = SignExtend32(Value, ByteSize * 8);
2174
2175 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2176 if (MaskVal == 0) return SDValue();
2177
2178 // Finally, if this value fits in a 5 bit sext field, return it
2179 if (SignExtend32<5>(MaskVal) == MaskVal)
2180 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2181 return SDValue();
2182}
2183
2184/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2185/// amount, otherwise return -1.
2187 EVT VT = N->getValueType(0);
2188 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2189 return -1;
2190
2191 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2192
2193 // Find the first non-undef value in the shuffle mask.
2194 unsigned i;
2195 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2196 /*search*/;
2197
2198 if (i == 4) return -1; // all undef.
2199
2200 // Otherwise, check to see if the rest of the elements are consecutively
2201 // numbered from this value.
2202 unsigned ShiftAmt = SVOp->getMaskElt(i);
2203 if (ShiftAmt < i) return -1;
2204 ShiftAmt -= i;
2205
2206 // Check the rest of the elements to see if they are consecutive.
2207 for (++i; i != 4; ++i)
2208 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2209 return -1;
2210
2211 return ShiftAmt;
2212}
2213
2214//===----------------------------------------------------------------------===//
2215// Addressing Mode Selection
2216//===----------------------------------------------------------------------===//
2217
2218/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2219/// or 64-bit immediate, and if the value can be accurately represented as a
2220/// sign extension from a 16-bit value. If so, this returns true and the
2221/// immediate.
2222bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2223 if (!isa<ConstantSDNode>(N))
2224 return false;
2225
2226 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2227 if (N->getValueType(0) == MVT::i32)
2228 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2229 else
2230 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2231}
2232bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2233 return isIntS16Immediate(Op.getNode(), Imm);
2234}
2235
2236
2237/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2238/// be represented as an indexed [r+r] operation.
2240 SDValue &Index,
2241 SelectionDAG &DAG) const {
2242 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2243 UI != E; ++UI) {
2244 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2245 if (Memop->getMemoryVT() == MVT::f64) {
2246 Base = N.getOperand(0);
2247 Index = N.getOperand(1);
2248 return true;
2249 }
2250 }
2251 }
2252 return false;
2253}
2254
2255/// SelectAddressRegReg - Given the specified addressed, check to see if it
2256/// can be represented as an indexed [r+r] operation. Returns false if it
2257/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2258/// non-zero and N can be represented by a base register plus a signed 16-bit
2259/// displacement, make a more precise judgement by checking (displacement % \p
2260/// EncodingAlignment).
2262 SDValue &Index, SelectionDAG &DAG,
2263 unsigned EncodingAlignment) const {
2264 int16_t imm = 0;
2265 if (N.getOpcode() == ISD::ADD) {
2266 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2267 // SPE load/store can only handle 8-bit offsets.
2268 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2269 return true;
2270 if (isIntS16Immediate(N.getOperand(1), imm) &&
2271 (!EncodingAlignment || !(imm % EncodingAlignment)))
2272 return false; // r+i
2273 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2274 return false; // r+i
2275
2276 Base = N.getOperand(0);
2277 Index = N.getOperand(1);
2278 return true;
2279 } else if (N.getOpcode() == ISD::OR) {
2280 if (isIntS16Immediate(N.getOperand(1), imm) &&
2281 (!EncodingAlignment || !(imm % EncodingAlignment)))
2282 return false; // r+i can fold it if we can.
2283
2284 // If this is an or of disjoint bitfields, we can codegen this as an add
2285 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2286 // disjoint.
2287 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2288
2289 if (LHSKnown.Zero.getBoolValue()) {
2290 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2291 // If all of the bits are known zero on the LHS or RHS, the add won't
2292 // carry.
2293 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2294 Base = N.getOperand(0);
2295 Index = N.getOperand(1);
2296 return true;
2297 }
2298 }
2299 }
2300
2301 return false;
2302}
2303
2304// If we happen to be doing an i64 load or store into a stack slot that has
2305// less than a 4-byte alignment, then the frame-index elimination may need to
2306// use an indexed load or store instruction (because the offset may not be a
2307// multiple of 4). The extra register needed to hold the offset comes from the
2308// register scavenger, and it is possible that the scavenger will need to use
2309// an emergency spill slot. As a result, we need to make sure that a spill slot
2310// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2311// stack slot.
2312static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2313 // FIXME: This does not handle the LWA case.
2314 if (VT != MVT::i64)
2315 return;
2316
2317 // NOTE: We'll exclude negative FIs here, which come from argument
2318 // lowering, because there are no known test cases triggering this problem
2319 // using packed structures (or similar). We can remove this exclusion if
2320 // we find such a test case. The reason why this is so test-case driven is
2321 // because this entire 'fixup' is only to prevent crashes (from the
2322 // register scavenger) on not-really-valid inputs. For example, if we have:
2323 // %a = alloca i1
2324 // %b = bitcast i1* %a to i64*
2325 // store i64* a, i64 b
2326 // then the store should really be marked as 'align 1', but is not. If it
2327 // were marked as 'align 1' then the indexed form would have been
2328 // instruction-selected initially, and the problem this 'fixup' is preventing
2329 // won't happen regardless.
2330 if (FrameIdx < 0)
2331 return;
2332
2334 MachineFrameInfo &MFI = MF.getFrameInfo();
2335
2336 unsigned Align = MFI.getObjectAlignment(FrameIdx);
2337 if (Align >= 4)
2338 return;
2339
2340 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2341 FuncInfo->setHasNonRISpills();
2342}
2343
2344/// Returns true if the address N can be represented by a base register plus
2345/// a signed 16-bit displacement [r+imm], and if it is not better
2346/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2347/// displacements that are multiples of that value.
2349 SDValue &Base,
2350 SelectionDAG &DAG,
2351 unsigned EncodingAlignment) const {
2352 // FIXME dl should come from parent load or store, not from address
2353 SDLoc dl(N);
2354 // If this can be more profitably realized as r+r, fail.
2355 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2356 return false;
2357
2358 if (N.getOpcode() == ISD::ADD) {
2359 int16_t imm = 0;
2360 if (isIntS16Immediate(N.getOperand(1), imm) &&
2361 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2362 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2363 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2364 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2365 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2366 } else {
2367 Base = N.getOperand(0);
2368 }
2369 return true; // [r+i]
2370 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2371 // Match LOAD (ADD (X, Lo(G))).
2372 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2373 && "Cannot handle constant offsets yet!");
2374 Disp = N.getOperand(1).getOperand(0); // The global address.
2379 Base = N.getOperand(0);
2380 return true; // [&g+r]
2381 }
2382 } else if (N.getOpcode() == ISD::OR) {
2383 int16_t imm = 0;
2384 if (isIntS16Immediate(N.getOperand(1), imm) &&
2385 (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
2386 // If this is an or of disjoint bitfields, we can codegen this as an add
2387 // (for better address arithmetic) if the LHS and RHS of the OR are
2388 // provably disjoint.
2389 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2390
2391 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2392 // If all of the bits are known zero on the LHS or RHS, the add won't
2393 // carry.
2394 if (FrameIndexSDNode *FI =
2395 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2396 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2397 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2398 } else {
2399 Base = N.getOperand(0);
2400 }
2401 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2402 return true;
2403 }
2404 }
2405 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2406 // Loading from a constant address.
2407
2408 // If this address fits entirely in a 16-bit sext immediate field, codegen
2409 // this as "d, 0"
2410 int16_t Imm;
2411 if (isIntS16Immediate(CN, Imm) &&
2412 (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
2413 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2414 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2415 CN->getValueType(0));
2416 return true;
2417 }
2418
2419 // Handle 32-bit sext immediates with LIS + addr mode.
2420 if ((CN->getValueType(0) == MVT::i32 ||
2421 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2422 (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
2423 int Addr = (int)CN->getZExtValue();
2424
2425 // Otherwise, break this down into an LIS + disp.
2426 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2427
2428 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2429 MVT::i32);
2430 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2431 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2432 return true;
2433 }
2434 }
2435
2436 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2437 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2438 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2439 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2440 } else
2441 Base = N;
2442 return true; // [r+0]
2443}
2444
2445/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2446/// represented as an indexed [r+r] operation.
2448 SDValue &Index,
2449 SelectionDAG &DAG) const {
2450 // Check to see if we can easily represent this as an [r+r] address. This
2451 // will fail if it thinks that the address is more profitably represented as
2452 // reg+imm, e.g. where imm = 0.
2453 if (SelectAddressRegReg(N, Base, Index, DAG))
2454 return true;
2455
2456 // If the address is the result of an add, we will utilize the fact that the
2457 // address calculation includes an implicit add. However, we can reduce
2458 // register pressure if we do not materialize a constant just for use as the
2459 // index register. We only get rid of the add if it is not an add of a
2460 // value and a 16-bit signed constant and both have a single use.
2461 int16_t imm = 0;
2462 if (N.getOpcode() == ISD::ADD &&
2463 (!isIntS16Immediate(N.getOperand(1), imm) ||
2464 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2465 Base = N.getOperand(0);
2466 Index = N.getOperand(1);
2467 return true;
2468 }
2469
2470 // Otherwise, do it the hard way, using R0 as the base register.
2471 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2472 N.getValueType());
2473 Index = N;
2474 return true;
2475}
2476
2477/// Returns true if we should use a direct load into vector instruction
2478/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2479static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2480
2481 // If there are any other uses other than scalar to vector, then we should
2482 // keep it as a scalar load -> direct move pattern to prevent multiple
2483 // loads.
2484 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2485 if (!LD)
2486 return false;
2487
2488 EVT MemVT = LD->getMemoryVT();
2489 if (!MemVT.isSimple())
2490 return false;
2491 switch(MemVT.getSimpleVT().SimpleTy) {
2492 case MVT::i64:
2493 break;
2494 case MVT::i32:
2495 if (!ST.hasP8Vector())
2496 return false;
2497 break;
2498 case MVT::i16:
2499 case MVT::i8:
2500 if (!ST.hasP9Vector())
2501 return false;
2502 break;
2503 default:
2504 return false;
2505 }
2506
2507 SDValue LoadedVal(N, 0);
2508 if (!LoadedVal.hasOneUse())
2509 return false;
2510
2511 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2512 UI != UE; ++UI)
2513 if (UI.getUse().get().getResNo() == 0 &&
2514 UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
2515 return false;
2516
2517 return true;
2518}
2519
2520/// getPreIndexedAddressParts - returns true by value, base pointer and
2521/// offset pointer and addressing mode by reference if the node's address
2522/// can be legally represented as pre-indexed load / store address.
2524 SDValue &Offset,
2526 SelectionDAG &DAG) const {
2527 if (DisablePPCPreinc) return false;
2528
2529 bool isLoad = true;
2530 SDValue Ptr;
2531 EVT VT;
2532 unsigned Alignment;
2533 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2534 Ptr = LD->getBasePtr();
2535 VT = LD->getMemoryVT();
2536 Alignment = LD->getAlignment();
2537 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2538 Ptr = ST->getBasePtr();
2539 VT = ST->getMemoryVT();
2540 Alignment = ST->getAlignment();
2541 isLoad = false;
2542 } else
2543 return false;
2544
2545 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2546 // instructions because we can fold these into a more efficient instruction
2547 // instead, (such as LXSD).
2548 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2549 return false;
2550 }
2551
2552 // PowerPC doesn't have preinc load/store instructions for vectors (except
2553 // for QPX, which does have preinc r+r forms).
2554 if (VT.isVector()) {
2555 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2556 return false;
2557 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2558 AM = ISD::PRE_INC;
2559 return true;
2560 }
2561 }
2562
2563 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2564 // Common code will reject creating a pre-inc form if the base pointer
2565 // is a frame index, or if N is a store and the base pointer is either
2566 // the same as or a predecessor of the value being stored. Check for
2567 // those situations here, and try with swapped Base/Offset instead.
2568 bool Swap = false;
2569
2570 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2571 Swap = true;
2572 else if (!isLoad) {
2573 SDValue Val = cast<StoreSDNode>(N)->getValue();
2574 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2575 Swap = true;
2576 }
2577
2578 if (Swap)
2579 std::swap(Base, Offset);
2580
2581 AM = ISD::PRE_INC;
2582 return true;
2583 }
2584
2585 // LDU/STU can only handle immediates that are a multiple of 4.
2586 if (VT != MVT::i64) {
2587 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
2588 return false;
2589 } else {
2590 // LDU/STU need an address with at least 4-byte alignment.
2591 if (Alignment < 4)
2592 return false;
2593
2594 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
2595 return false;
2596 }
2597
2598 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2599 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2600 // sext i32 to i64 when addr mode is r+i.
2601 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2602 LD->getExtensionType() == ISD::SEXTLOAD &&
2603 isa<ConstantSDNode>(Offset))
2604 return false;
2605 }
2606
2607 AM = ISD::PRE_INC;
2608 return true;
2609}
2610
2611//===----------------------------------------------------------------------===//
2612// LowerOperation implementation
2613//===----------------------------------------------------------------------===//
2614
2615/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2616/// and LoOpFlags to the target MO flags.
2617static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2618 unsigned &HiOpFlags, unsigned &LoOpFlags,
2619 const GlobalValue *GV = nullptr) {
2620 HiOpFlags = PPCII::MO_HA;
2621 LoOpFlags = PPCII::MO_LO;
2622
2623 // Don't use the pic base if not in PIC relocation model.
2624 if (IsPIC) {
2625 HiOpFlags |= PPCII::MO_PIC_FLAG;
2626 LoOpFlags |= PPCII::MO_PIC_FLAG;
2627 }
2628
2629 // If this is a reference to a global value that requires a non-lazy-ptr, make
2630 // sure that instruction lowering adds it.
2631 if (GV && Subtarget.hasLazyResolverStub(GV)) {
2632 HiOpFlags |= PPCII::MO_NLP_FLAG;
2633 LoOpFlags |= PPCII::MO_NLP_FLAG;
2634
2635 if (GV->hasHiddenVisibility()) {
2636 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2637 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2638 }
2639 }
2640}
2641
2642static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2643 SelectionDAG &DAG) {
2644 SDLoc DL(HiPart);
2645 EVT PtrVT = HiPart.getValueType();
2646 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2647
2648 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2649 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2650
2651 // With PIC, the first instruction is actually "GR+hi(&G)".
2652 if (isPIC)
2653 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2654 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2655
2656 // Generate non-pic code that has direct accesses to the constant pool.
2657 // The address of the global is just (hi(&g)+lo(&g)).
2658 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2659}
2660
2662 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2663 FuncInfo->setUsesTOCBasePtr();
2664}
2665
2668}
2669
2670static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2671 SDValue GA) {
2672 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2673 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2674 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2675
2676 SDValue Ops[] = { GA, Reg };
2677 return DAG.getMemIntrinsicNode(
2678 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2681}
2682
2683SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2684 SelectionDAG &DAG) const {
2685 EVT PtrVT = Op.getValueType();
2686 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2687 const Constant *C = CP->getConstVal();
2688
2689 // 64-bit SVR4 ABI code is always position-independent.
2690 // The actual address of the GlobalValue is stored in the TOC.
2691 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2692 setUsesTOCBasePtr(DAG);
2693 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2694 return getTOCEntry(DAG, SDLoc(CP), true, GA);
2695 }
2696
2697 unsigned MOHiFlag, MOLoFlag;
2698 bool IsPIC = isPositionIndependent();
2699 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2700
2701 if (IsPIC && Subtarget.isSVR4ABI()) {
2702 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2704 return getTOCEntry(DAG, SDLoc(CP), false, GA);
2705 }
2706
2707 SDValue CPIHi =
2708 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2709 SDValue CPILo =
2710 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2711 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2712}
2713
2714// For 64-bit PowerPC, prefer the more compact relative encodings.
2715// This trades 32 bits per jump table entry for one or two instructions
2716// on the jump site.
2718 if (isJumpTableRelative())
2720
2722}
2723
2725 if (Subtarget.isPPC64())
2726 return true;
2728}
2729
2731 SelectionDAG &DAG) const {
2732 if (!Subtarget.isPPC64())
2734
2735 switch (getTargetMachine().getCodeModel()) {
2736 case CodeModel::Small:
2737 case CodeModel::Medium:
2739 default:
2740 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2742 }
2743}
2744
2745const MCExpr *
2747 unsigned JTI,
2748 MCContext &Ctx) const {
2749 if (!Subtarget.isPPC64())
2751
2752 switch (getTargetMachine().getCodeModel()) {
2753 case CodeModel::Small:
2754 case CodeModel::Medium:
2756 default:
2757 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2758 }
2759}
2760
2761SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2762 EVT PtrVT = Op.getValueType();
2763 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2764
2765 // 64-bit SVR4 ABI code is always position-independent.
2766 // The actual address of the GlobalValue is stored in the TOC.
2767 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2768 setUsesTOCBasePtr(DAG);
2769 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2770 return getTOCEntry(DAG, SDLoc(JT), true, GA);
2771 }
2772
2773 unsigned MOHiFlag, MOLoFlag;
2774 bool IsPIC = isPositionIndependent();
2775 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2776
2777 if (IsPIC && Subtarget.isSVR4ABI()) {
2778 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2780 return getTOCEntry(DAG, SDLoc(GA), false, GA);
2781 }
2782
2783 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2784 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2785 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2786}
2787
2788SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2789 SelectionDAG &DAG) const {
2790 EVT PtrVT = Op.getValueType();
2791 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2792 const BlockAddress *BA = BASDN->getBlockAddress();
2793
2794 // 64-bit SVR4 ABI code is always position-independent.
2795 // The actual BlockAddress is stored in the TOC.
2796 if (Subtarget.isSVR4ABI() &&
2797 (Subtarget.isPPC64() || isPositionIndependent())) {
2798 if (Subtarget.isPPC64())
2799 setUsesTOCBasePtr(DAG);
2800 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2801 return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
2802 }
2803
2804 unsigned MOHiFlag, MOLoFlag;
2805 bool IsPIC = isPositionIndependent();
2806 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2807 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2808 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2809 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2810}
2811
2812SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2813 SelectionDAG &DAG) const {
2814 // FIXME: TLS addresses currently use medium model code sequences,
2815 // which is the most useful form. Eventually support for small and
2816 // large models could be added if users need it, at the cost of
2817 // additional complexity.
2818 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2819 if (DAG.getTarget().useEmulatedTLS())
2820 return LowerToTLSEmulatedModel(GA, DAG);
2821
2822 SDLoc dl(GA);
2823 const GlobalValue *GV = GA->getGlobal();
2824 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2825 bool is64bit = Subtarget.isPPC64();
2826 const Module *M = DAG.getMachineFunction().getFunction().getParent();
2827 PICLevel::Level picLevel = M->getPICLevel();
2828
2830 TLSModel::Model Model = TM.getTLSModel(GV);
2831
2832 if (Model == TLSModel::LocalExec) {
2833 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2835 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2837 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2838 : DAG.getRegister(PPC::R2, MVT::i32);
2839
2840 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2841 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2842 }
2843
2844 if (Model == TLSModel::InitialExec) {
2845 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2846 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2848 SDValue GOTPtr;
2849 if (is64bit) {
2850 setUsesTOCBasePtr(DAG);
2851 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2852 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2853 PtrVT, GOTReg, TGA);
2854 } else {
2855 if (!TM.isPositionIndependent())
2856 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2857 else if (picLevel == PICLevel::SmallPIC)
2858 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2859 else
2860 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2861 }
2862 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2863 PtrVT, TGA, GOTPtr);
2864 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2865 }
2866
2867 if (Model == TLSModel::GeneralDynamic) {
2868 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2869 SDValue GOTPtr;
2870 if (is64bit) {
2871 setUsesTOCBasePtr(DAG);
2872 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2873 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2874 GOTReg, TGA);
2875 } else {
2876 if (picLevel == PICLevel::SmallPIC)
2877 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2878 else
2879 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2880 }
2881 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2882 GOTPtr, TGA, TGA);
2883 }
2884
2885 if (Model == TLSModel::LocalDynamic) {
2886 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2887 SDValue GOTPtr;
2888 if (is64bit) {
2889 setUsesTOCBasePtr(DAG);
2890 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2891 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2892 GOTReg, TGA);
2893 } else {
2894 if (picLevel == PICLevel::SmallPIC)
2895 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2896 else
2897 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2898 }
2899 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2900 PtrVT, GOTPtr, TGA, TGA);
2901 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2902 PtrVT, TLSAddr, TGA);
2903 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2904 }
2905
2906 llvm_unreachable("Unknown TLS model!");
2907}
2908
2909SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2910 SelectionDAG &DAG) const {
2911 EVT PtrVT = Op.getValueType();
2912 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2913 SDLoc DL(GSDN);
2914 const GlobalValue *GV = GSDN->getGlobal();
2915
2916 // 64-bit SVR4 ABI code is always position-independent.
2917 // The actual address of the GlobalValue is stored in the TOC.
2918 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2919 setUsesTOCBasePtr(DAG);
2920 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2921 return getTOCEntry(DAG, DL, true, GA);
2922 }
2923
2924 unsigned MOHiFlag, MOLoFlag;
2925 bool IsPIC = isPositionIndependent();
2926 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2927
2928 if (IsPIC && Subtarget.isSVR4ABI()) {
2929 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2930 GSDN->getOffset(),
2932 return getTOCEntry(DAG, DL, false, GA);
2933 }
2934
2935 SDValue GAHi =
2936 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2937 SDValue GALo =
2938 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2939
2940 SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2941
2942 // If the global reference is actually to a non-lazy-pointer, we have to do an
2943 // extra load to get the address of the global.
2944 if (MOHiFlag & PPCII::MO_NLP_FLAG)
2945 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2946 return Ptr;
2947}
2948
2949SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2950 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2951 SDLoc dl(Op);
2952
2953 if (Op.getValueType() == MVT::v2i64) {
2954 // When the operands themselves are v2i64 values, we need to do something
2955 // special because VSX has no underlying comparison operations for these.
2956 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2957 // Equality can be handled by casting to the legal type for Altivec
2958 // comparisons, everything else needs to be expanded.
2959 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2960 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2961 DAG.getSetCC(dl, MVT::v4i32,
2962 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2963 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2964 CC));
2965 }
2966
2967 return SDValue();
2968 }
2969
2970 // We handle most of these in the usual way.
2971 return Op;
2972 }
2973
2974 // If we're comparing for equality to zero, expose the fact that this is
2975 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2976 // fold the new nodes.
2977 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
2978 return V;
2979
2980 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2981 // Leave comparisons against 0 and -1 alone for now, since they're usually
2982 // optimized. FIXME: revisit this when we can custom lower all setcc
2983 // optimizations.
2984 if (C->isAllOnesValue() || C->isNullValue())
2985 return SDValue();
2986 }
2987
2988 // If we have an integer seteq/setne, turn it into a compare against zero
2989 // by xor'ing the rhs with the lhs, which is faster than setting a
2990 // condition register, reading it back out, and masking the correct bit. The
2991 // normal approach here uses sub to do this instead of xor. Using xor exposes
2992 // the result to other bit-twiddling opportunities.
2993 EVT LHSVT = Op.getOperand(0).getValueType();
2994 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2995 EVT VT = Op.getValueType();
2996 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2997 Op.getOperand(1));
2998 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2999 }
3000 return SDValue();
3001}
3002
3003SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3004 SDNode *Node = Op.getNode();
3005 EVT VT = Node->getValueType(0);
3006 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3007 SDValue InChain = Node->getOperand(0);
3008 SDValue VAListPtr = Node->getOperand(1);
3009 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3010 SDLoc dl(Node);
3011
3012 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3013
3014 // gpr_index
3015 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3016 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3017 InChain = GprIndex.getValue(1);
3018
3019 if (VT == MVT::i64) {
3020 // Check if GprIndex is even
3021 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3022 DAG.getConstant(1, dl, MVT::i32));
3023 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3024 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3025 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3026 DAG.getConstant(1, dl, MVT::i32));
3027 // Align GprIndex to be even if it isn't
3028 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3029 GprIndex);
3030 }
3031
3032 // fpr index is 1 byte after gpr
3033 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3034 DAG.getConstant(1, dl, MVT::i32));
3035
3036 // fpr
3037 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3038 FprPtr, MachinePointerInfo(SV), MVT::i8);
3039 InChain = FprIndex.getValue(1);
3040
3041 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3042 DAG.getConstant(8, dl, MVT::i32));
3043
3044 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3045 DAG.getConstant(4, dl, MVT::i32));
3046
3047 // areas
3048 SDValue OverflowArea =
3049 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3050 InChain = OverflowArea.getValue(1);
3051
3052 SDValue RegSaveArea =
3053 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3054 InChain = RegSaveArea.getValue(1);
3055
3056 // select overflow_area if index > 8
3057 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3058 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3059
3060 // adjustment constant gpr_index * 4/8
3061 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3062 VT.isInteger() ? GprIndex : FprIndex,
3063 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3064 MVT::i32));
3065
3066 // OurReg = RegSaveArea + RegConstant
3067 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3068 RegConstant);
3069
3070 // Floating types are 32 bytes into RegSaveArea
3071 if (VT.isFloatingPoint())
3072 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3073 DAG.getConstant(32, dl, MVT::i32));
3074
3075 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3076 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3077 VT.isInteger() ? GprIndex : FprIndex,
3078 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3079 MVT::i32));
3080
3081 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3082 VT.isInteger() ? VAListPtr : FprPtr,
3084
3085 // determine if we should load from reg_save_area or overflow_area
3086 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3087
3088 // increase overflow_area by 4/8 if gpr/fpr > 8
3089 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3090 DAG.getConstant(VT.isInteger() ? 4 : 8,
3091 dl, MVT::i32));
3092
3093 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3094 OverflowAreaPlusN);
3095
3096 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3098
3099 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3100}
3101
3102SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3103 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3104
3105 // We have to copy the entire va_list struct:
3106 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3107 return DAG.getMemcpy(Op.getOperand(0), Op,
3108 Op.getOperand(1), Op.getOperand(2),
3109 DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
3111}
3112
3113SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3114 SelectionDAG &DAG) const {
3115 return Op.getOperand(0);
3116}
3117
3118SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3119 SelectionDAG &DAG) const {
3120 SDValue Chain = Op.getOperand(0);
3121 SDValue Trmp = Op.getOperand(1); // trampoline
3122 SDValue FPtr = Op.getOperand(2); // nested function
3123 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3124 SDLoc dl(Op);
3125
3126 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3127 bool isPPC64 = (PtrVT == MVT::i64);
3128 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3129
3132
3133 Entry.Ty = IntPtrTy;
3134 Entry.Node = Trmp; Args.push_back(Entry);
3135
3136 // TrampSize == (isPPC64 ? 48 : 40);
3137 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3138 isPPC64 ? MVT::i64 : MVT::i32);
3139 Args.push_back(Entry);
3140
3141 Entry.Node = FPtr; Args.push_back(Entry);
3142 Entry.Node = Nest; Args.push_back(Entry);
3143
3144 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3146 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3148 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3149
3150 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3151 return CallResult.second;
3152}
3153
3154SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3156 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3157 EVT PtrVT = getPointerTy(MF.getDataLayout());
3158
3159 SDLoc dl(Op);
3160
3161 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
3162 // vastart just stores the address of the VarArgsFrameIndex slot into the
3163 // memory location argument.
3164 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3165 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3166 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3167 MachinePointerInfo(SV));
3168 }
3169
3170 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3171 // We suppose the given va_list is already allocated.
3172 //
3173 // typedef struct {
3174 // char gpr; /* index into the array of 8 GPRs
3175 // * stored in the register save area
3176 // * gpr=0 corresponds to r3,
3177 // * gpr=1 to r4, etc.
3178 // */
3179 // char fpr; /* index into the array of 8 FPRs
3180 // * stored in the register save area
3181 // * fpr=0 corresponds to f1,
3182 // * fpr=1 to f2, etc.
3183 // */
3184 // char *overflow_arg_area;
3185 // /* location on stack that holds
3186 // * the next overflow argument
3187 // */
3188 // char *reg_save_area;
3189 // /* where r3:r10 and f1:f8 (if saved)
3190 // * are stored
3191 // */
3192 // } va_list[1];
3193
3194 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3195 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3196 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3197 PtrVT);
3198 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3199 PtrVT);
3200
3201 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3202 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3203
3204 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3205 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3206
3207 uint64_t FPROffset = 1;
3208 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3209
3210 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3211
3212 // Store first byte : number of int regs
3213 SDValue firstStore =
3214 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3216 uint64_t nextOffset = FPROffset;
3217 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3218 ConstFPROffset);
3219
3220 // Store second byte : number of float regs
3221 SDValue secondStore =
3222 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3223 MachinePointerInfo(SV, nextOffset), MVT::i8);
3224 nextOffset += StackOffset;
3225 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3226
3227 // Store second word : arguments given on stack
3228 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3229 MachinePointerInfo(SV, nextOffset));
3230 nextOffset += FrameOffset;
3231 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3232
3233 // Store third word : arguments given in registers
3234 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3235 MachinePointerInfo(SV, nextOffset));
3236}
3237
3238/// FPR - The set of FP registers that should be allocated for arguments,
3239/// on Darwin.
3240static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3241 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3242 PPC::F11, PPC::F12, PPC::F13};
3243
3244/// QFPR - The set of QPX registers that should be allocated for arguments.
3245static const MCPhysReg QFPR[] = {
3246 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
3247 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3248
3249/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3250/// the stack.
3251static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3252 unsigned PtrByteSize) {
3253 unsigned ArgSize = ArgVT.getStoreSize();
3254 if (Flags.isByVal())
3255 ArgSize = Flags.getByValSize();
3256
3257 // Round up to multiples of the pointer size, except for array members,
3258 // which are always packed.
3259 if (!Flags.isInConsecutiveRegs())
3260 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3261
3262 return ArgSize;
3263}
3264
3265/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3266/// on the stack.
3267static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3268 ISD::ArgFlagsTy Flags,
3269 unsigned PtrByteSize) {
3270 unsigned Align = PtrByteSize;
3271
3272 // Altivec parameters are padded to a 16 byte boundary.
3273 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3274 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3275 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3276 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3277 Align = 16;
3278 // QPX vector types stored in double-precision are padded to a 32 byte
3279 // boundary.
3280 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3281 Align = 32;
3282
3283 // ByVal parameters are aligned as requested.
3284 if (Flags.isByVal()) {
3285 unsigned BVAlign = Flags.getByValAlign();
3286 if (BVAlign > PtrByteSize) {
3287 if (BVAlign % PtrByteSize != 0)
3289 "ByVal alignment is not a multiple of the pointer size");
3290
3291 Align = BVAlign;
3292 }
3293 }
3294
3295 // Array members are always packed to their original alignment.
3296 if (Flags.isInConsecutiveRegs()) {
3297 // If the array member was split into multiple registers, the first
3298 // needs to be aligned to the size of the full type. (Except for
3299 // ppcf128, which is only aligned as its f64 components.)
3300 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3301 Align = OrigVT.getStoreSize();
3302 else
3303 Align = ArgVT.getStoreSize();
3304 }
3305
3306 return Align;
3307}
3308
3309/// CalculateStackSlotUsed - Return whether this argument will use its
3310/// stack slot (instead of being passed in registers). ArgOffset,
3311/// AvailableFPRs, and AvailableVRs must hold the current argument
3312/// position, and will be updated to account for this argument.
3313static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
3314 ISD::ArgFlagsTy Flags,
3315 unsigned PtrByteSize,
3316 unsigned LinkageSize,
3317 unsigned ParamAreaSize,
3318 unsigned &ArgOffset,
3319 unsigned &AvailableFPRs,
3320 unsigned &AvailableVRs, bool HasQPX) {
3321 bool UseMemory = false;
3322
3323 // Respect alignment of argument on the stack.
3324 unsigned Align =
3325 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3326 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3327 // If there's no space left in the argument save area, we must
3328 // use memory (this check also catches zero-sized arguments).
3329 if (ArgOffset >= LinkageSize + ParamAreaSize)
3330 UseMemory = true;
3331
3332 // Allocate argument on the stack.
3333 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3334 if (Flags.isInConsecutiveRegsLast())
3335 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3336 // If we overran the argument save area, we must use memory
3337 // (this check catches arguments passed partially in memory)
3338 if (ArgOffset > LinkageSize + ParamAreaSize)
3339 UseMemory = true;
3340
3341 // However, if the argument is actually passed in an FPR or a VR,
3342 // we don't use memory after all.
3343 if (!Flags.isByVal()) {
3344 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3345 // QPX registers overlap with the scalar FP registers.
3346 (HasQPX && (ArgVT == MVT::v4f32 ||
3347 ArgVT == MVT::v4f64 ||
3348 ArgVT == MVT::v4i1)))
3349 if (AvailableFPRs > 0) {
3350 --AvailableFPRs;
3351 return false;
3352 }
3353 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3354 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3355 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3356 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3357 if (AvailableVRs > 0) {
3358 --AvailableVRs;
3359 return false;
3360 }
3361 }
3362
3363 return UseMemory;
3364}
3365
3366/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3367/// ensure minimum alignment required for target.
3369 unsigned NumBytes) {
3370 unsigned TargetAlign = Lowering->getStackAlignment();
3371 unsigned AlignMask = TargetAlign - 1;
3372 NumBytes = (NumBytes + AlignMask) & ~AlignMask;
3373 return NumBytes;
3374}
3375
3376SDValue PPCTargetLowering::LowerFormalArguments(
3377 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3378 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3379 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3380 if (Subtarget.isSVR4ABI()) {
3381 if (Subtarget.isPPC64())
3382 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
3383 dl, DAG, InVals);
3384 else
3385 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
3386 dl, DAG, InVals);
3387 } else {
3388 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
3389 dl, DAG, InVals);
3390 }
3391}
3392
3393SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3394 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3395 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3396 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3397
3398 // 32-bit SVR4 ABI Stack Frame Layout:
3399 // +-----------------------------------+
3400 // +--> | Back chain |
3401 // | +-----------------------------------+
3402 // | | Floating-point register save area |
3403 // | +-----------------------------------+
3404 // | | General register save area |
3405 // | +-----------------------------------+
3406 // | | CR save word |
3407 // | +-----------------------------------+
3408 // | | VRSAVE save word |
3409 // | +-----------------------------------+
3410 // | | Alignment padding |
3411 // | +-----------------------------------+
3412 // | | Vector register save area |
3413 // | +-----------------------------------+
3414 // | | Local variable space |
3415 // | +-----------------------------------+
3416 // | | Parameter list area |
3417 // | +-----------------------------------+
3418 // | | LR save word |
3419 // | +-----------------------------------+
3420 // SP--> +--- | Back chain |
3421 // +-----------------------------------+
3422 //
3423 // Specifications:
3424 // System V Application Binary Interface PowerPC Processor Supplement
3425 // AltiVec Technology Programming Interface Manual
3426
3428 MachineFrameInfo &MFI = MF.getFrameInfo();
3429 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3430
3431 EVT PtrVT = getPointerTy(MF.getDataLayout());
3432 // Potential tail calls could cause overwriting of argument stack slots.
3433 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3434 (CallConv == CallingConv::Fast));
3435 unsigned PtrByteSize = 4;
3436
3437 // Assign locations to all of the incoming arguments.
3439 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3440 *DAG.getContext());
3441
3442 // Reserve space for the linkage area on the stack.
3443 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3444 CCInfo.AllocateStack(LinkageSize, PtrByteSize);
3445 if (useSoftFloat())
3446 CCInfo.PreAnalyzeFormalArguments(Ins);
3447
3448 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3449 CCInfo.clearWasPPCF128();
3450
3451 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3452 CCValAssign &VA = ArgLocs[i];
3453
3454 // Arguments stored in registers.
3455 if (VA.isRegLoc()) {
3456 const TargetRegisterClass *RC;
3457 EVT ValVT = VA.getValVT();
3458
3459 switch (ValVT.getSimpleVT().SimpleTy) {
3460 default:
3461 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3462 case MVT::i1:
3463 case MVT::i32:
3464 RC = &PPC::GPRCRegClass;
3465 break;
3466 case MVT::f32:
3467 if (Subtarget.hasP8Vector())
3468 RC = &PPC::VSSRCRegClass;
3469 else if (Subtarget.hasSPE())
3470 RC = &PPC::SPE4RCRegClass;
3471 else
3472 RC = &PPC::F4RCRegClass;
3473 break;
3474 case MVT::f64:
3475 if (Subtarget.hasVSX())
3476 RC = &PPC::VSFRCRegClass;
3477 else if (Subtarget.hasSPE())
3478 // SPE passes doubles in GPR pairs.
3479 RC = &PPC::GPRCRegClass;
3480 else
3481 RC = &PPC::F8RCRegClass;
3482 break;
3483 case MVT::v16i8:
3484 case MVT::v8i16:
3485 case MVT::v4i32:
3486 RC = &PPC::VRRCRegClass;
3487 break;
3488 case MVT::v4f32:
3489 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3490 break;
3491 case MVT::v2f64:
3492 case MVT::v2i64:
3493 RC = &PPC::VRRCRegClass;
3494 break;
3495 case MVT::v4f64:
3496 RC = &PPC::QFRCRegClass;
3497 break;
3498 case MVT::v4i1:
3499 RC = &PPC::QBRCRegClass;
3500 break;
3501 }
3502
3503 SDValue ArgValue;
3504 // Transform the arguments stored in physical registers into
3505 // virtual ones.
3506 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3507 assert(i + 1 < e && "No second half of double precision argument");
3508 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3509 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3510 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3511 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3512 if (!Subtarget.isLittleEndian())
3513 std::swap (ArgValueLo, ArgValueHi);
3514 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3515 ArgValueHi);
3516 } else {
3517 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3518 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3519 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3520 if (ValVT == MVT::i1)
3521 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3522 }
3523
3524 InVals.push_back(ArgValue);
3525 } else {
3526 // Argument stored in memory.
3527 assert(VA.isMemLoc());
3528
3529 // Get the extended size of the argument type in stack
3530 unsigned ArgSize = VA.getLocVT().getStoreSize();
3531 // Get the actual size of the argument type
3532 unsigned ObjSize = VA.getValVT().getStoreSize();
3533 unsigned ArgOffset = VA.getLocMemOffset();
3534 // Stack objects in PPC32 are right justified.
3535 ArgOffset += ArgSize - ObjSize;
3536 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3537
3538 // Create load nodes to retrieve arguments from the stack.
3539 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3540 InVals.push_back(
3541 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3542 }
3543 }
3544
3545 // Assign locations to all of the incoming aggregate by value arguments.
3546 // Aggregates passed by value are stored in the local variable space of the
3547 // caller's stack frame, right above the parameter list area.
3548 SmallVector<CCValAssign, 16> ByValArgLocs;
3549 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3550 ByValArgLocs, *DAG.getContext());
3551
3552 // Reserve stack space for the allocations in CCInfo.
3553 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3554
3555 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3556
3557 // Area that is at least reserved in the caller of this function.
3558 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3559 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3560
3561 // Set the size that is at least reserved in caller of this function. Tail
3562 // call optimized function's reserved stack space needs to be aligned so that
3563 // taking the difference between two stack areas will result in an aligned
3564 // stack.
3565 MinReservedArea =
3566 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3567 FuncInfo->setMinReservedArea(MinReservedArea);
3568
3570
3571 // If the function takes variable number of arguments, make a frame index for
3572 // the start of the first vararg value... for expansion of llvm.va_start.
3573 if (isVarArg) {
3574 static const MCPhysReg GPArgRegs[] = {
3575 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3576 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3577 };
3578 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3579
3580 static const MCPhysReg FPArgRegs[] = {
3581 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3582 PPC::F8
3583 };
3584 unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3585
3586 if (useSoftFloat() || hasSPE())
3587 NumFPArgRegs = 0;
3588
3589 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3590 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3591
3592 // Make room for NumGPArgRegs and NumFPArgRegs.
3593 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3594 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3595
3596 FuncInfo->setVarArgsStackOffset(
3597 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3598 CCInfo.getNextStackOffset(), true));
3599
3600 FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
3601 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3602
3603 // The fixed integer arguments of a variadic function are stored to the
3604 // VarArgsFrameIndex on the stack so that they may be loaded by
3605 // dereferencing the result of va_next.
3606 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3607 // Get an existing live-in vreg, or add a new one.
3608 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3609 if (!VReg)
3610 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3611
3612 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3613 SDValue Store =
3614 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3615 MemOps.push_back(Store);
3616 // Increment the address by four for the next argument to store
3617 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3618 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3619 }
3620
3621 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3622 // is set.
3623 // The double arguments are stored to the VarArgsFrameIndex
3624 // on the stack.
3625 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3626 // Get an existing live-in vreg, or add a new one.
3627 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3628 if (!VReg)
3629 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3630
3631 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3632 SDValue Store =
3633 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3634 MemOps.push_back(Store);
3635 // Increment the address by eight for the next argument to store
3636 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3637 PtrVT);
3638 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3639 }
3640 }
3641
3642 if (!MemOps.empty())
3643 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3644
3645 return Chain;
3646}
3647
3648// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3649// value to MVT::i64 and then truncate to the correct register size.
3650SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3651 EVT ObjectVT, SelectionDAG &DAG,
3652 SDValue ArgVal,
3653 const SDLoc &dl) const {
3654 if (Flags.isSExt())
3655 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3656 DAG.getValueType(ObjectVT));
3657 else if (Flags.isZExt())
3658 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3659 DAG.getValueType(ObjectVT));
3660
3661 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3662}
3663
3664SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3665 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3666 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3667 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3668 // TODO: add description of PPC stack frame format, or at least some docs.
3669 //
3670 bool isELFv2ABI = Subtarget.isELFv2ABI();
3671 bool isLittleEndian = Subtarget.isLittleEndian();
3673 MachineFrameInfo &MFI = MF.getFrameInfo();
3674 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3675
3676 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3677 "fastcc not supported on varargs functions");
3678
3679 EVT PtrVT = getPointerTy(MF.getDataLayout());
3680 // Potential tail calls could cause overwriting of argument stack slots.
3681 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3682 (CallConv == CallingConv::Fast));
3683 unsigned PtrByteSize = 8;
3684 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3685
3686 static const MCPhysReg GPR[] = {
3687 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3688 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3689 };
3690 static const MCPhysReg VR[] = {
3691 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3692 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3693 };
3694
3695 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3696 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3697 const unsigned Num_VR_Regs = array_lengthof(VR);
3698 const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3699
3700 // Do a first pass over the arguments to determine whether the ABI
3701 // guarantees that our caller has allocated the parameter save area
3702 // on its stack frame. In the ELFv1 ABI, this is always the case;
3703 // in the ELFv2 ABI, it is true if this is a vararg function or if
3704 // any parameter is located in a stack slot.
3705
3706 bool HasParameterArea = !isELFv2ABI || isVarArg;
3707 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3708 unsigned NumBytes = LinkageSize;
3709 unsigned AvailableFPRs = Num_FPR_Regs;
3710 unsigned AvailableVRs = Num_VR_Regs;
3711 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3712 if (Ins[i].Flags.isNest())
3713 continue;
3714
3715 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3716 PtrByteSize, LinkageSize, ParamAreaSize,
3717 NumBytes, AvailableFPRs, AvailableVRs,
3718 Subtarget.hasQPX()))
3719 HasParameterArea = true;
3720 }
3721
3722 // Add DAG nodes to load the arguments or copy them out of registers. On
3723 // entry to a function on PPC, the arguments start after the linkage area,
3724 // although the first ones are often in registers.
3725
3726 unsigned ArgOffset = LinkageSize;
3727 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3728 unsigned &QFPR_idx = FPR_idx;
3730 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3731 unsigned CurArgIdx = 0;
3732 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3733 SDValue ArgVal;
3734 bool needsLoad = false;
3735 EVT ObjectVT = Ins[ArgNo].VT;
3736 EVT OrigVT = Ins[ArgNo].ArgVT;
3737 unsigned ObjSize = ObjectVT.getStoreSize();
3738 unsigned ArgSize = ObjSize;
3739 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3740 if (Ins[ArgNo].isOrigArg()) {
3741 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3742 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3743 }
3744 // We re-align the argument offset for each argument, except when using the
3745 // fast calling convention, when we need to make sure we do that only when
3746 // we'll actually use a stack slot.
3747 unsigned CurArgOffset, Align;
3748 auto ComputeArgOffset = [&]() {
3749 /* Respect alignment of argument on the stack. */
3750 Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3751 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3752 CurArgOffset = ArgOffset;
3753 };
3754
3755 if (CallConv != CallingConv::Fast) {
3756 ComputeArgOffset();
3757
3758 /* Compute GPR index associated with argument offset. */
3759 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3760 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3761 }
3762
3763 // FIXME the codegen can be much improved in some cases.
3764 // We do not have to keep everything in memory.
3765 if (Flags.isByVal()) {
3766 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3767
3768 if (CallConv == CallingConv::Fast)
3769 ComputeArgOffset();
3770
3771 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3772 ObjSize = Flags.getByValSize();
3773 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3774 // Empty aggregate parameters do not take up registers. Examples:
3775 // struct { } a;
3776 // union { } b;
3777 // int c[0];
3778 // etc. However, we have to provide a place-holder in InVals, so
3779 // pretend we have an 8-byte item at the current address for that
3780 // purpose.
3781 if (!ObjSize) {
3782 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3783 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3784 InVals.push_back(FIN);
3785 continue;
3786 }
3787
3788 // Create a stack object covering all stack doublewords occupied
3789 // by the argument. If the argument is (fully or partially) on
3790 // the stack, or if the argument is fully in registers but the
3791 // caller has allocated the parameter save anyway, we can refer
3792 // directly to the caller's stack frame. Otherwise, create a
3793 // local copy in our own frame.
3794 int FI;
3795 if (HasParameterArea ||
3796 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3797 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3798 else
3799 FI = MFI.CreateStackObject(ArgSize, Align, false);
3800 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3801
3802 // Handle aggregates smaller than 8 bytes.
3803 if (ObjSize < PtrByteSize) {
3804 // The value of the object is its address, which differs from the
3805 // address of the enclosing doubleword on big-endian systems.
3806 SDValue Arg = FIN;
3807 if (!isLittleEndian) {
3808 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3809 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3810 }
3811 InVals.push_back(Arg);
3812
3813 if (GPR_idx != Num_GPR_Regs) {
3814 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3815 FuncInfo->addLiveInAttr(VReg, Flags);
3816 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3817 SDValue Store;
3818
3819 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3820 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3821 (ObjSize == 2 ? MVT::i16 : MVT::i32));
3822 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3823 MachinePointerInfo(&*FuncArg), ObjType);
3824 } else {
3825 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3826 // store the whole register as-is to the parameter save area
3827 // slot.
3828 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3829 MachinePointerInfo(&*FuncArg));
3830 }
3831
3832 MemOps.push_back(Store);
3833 }
3834 // Whether we copied from a register or not, advance the offset
3835 // into the parameter save area by a full doubleword.
3836 ArgOffset += PtrByteSize;
3837 continue;
3838 }
3839
3840 // The value of the object is its address, which is the address of
3841 // its first stack doubleword.
3842 InVals.push_back(FIN);
3843
3844 // Store whatever pieces of the object are in registers to memory.
3845 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3846 if (GPR_idx == Num_GPR_Regs)
3847 break;
3848
3849 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3850 FuncInfo->addLiveInAttr(VReg, Flags);
3851 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3852 SDValue Addr = FIN;
3853 if (j) {
3854 SDValue Off = DAG.getConstant(j, dl, PtrVT);
3855 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3856 }
3857 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3858 MachinePointerInfo(&*FuncArg, j));
3859 MemOps.push_back(Store);
3860 ++GPR_idx;
3861 }
3862 ArgOffset += ArgSize;
3863 continue;
3864 }
3865
3866 switch (ObjectVT.getSimpleVT().SimpleTy) {
3867 default: llvm_unreachable("Unhandled argument type!");
3868 case MVT::i1:
3869 case MVT::i32:
3870 case MVT::i64:
3871 if (Flags.isNest()) {
3872 // The 'nest' parameter, if any, is passed in R11.
3873 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3874 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3875
3876 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3877 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3878
3879 break;
3880 }
3881
3882 // These can be scalar arguments or elements of an integer array type
3883 // passed directly. Clang may use those instead of "byval" aggregate
3884 // types to avoid forcing arguments to memory unnecessarily.
3885 if (GPR_idx != Num_GPR_Regs) {
3886 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3887 FuncInfo->addLiveInAttr(VReg, Flags);
3888 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3889
3890 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3891 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3892 // value to MVT::i64 and then truncate to the correct register size.
3893 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3894 } else {
3895 if (CallConv == CallingConv::Fast)
3896 ComputeArgOffset();
3897
3898 needsLoad = true;
3899 ArgSize = PtrByteSize;
3900 }
3901 if (CallConv != CallingConv::Fast || needsLoad)
3902 ArgOffset += 8;
3903 break;
3904
3905 case MVT::f32:
3906 case MVT::f64:
3907 // These can be scalar arguments or elements of a float array type
3908 // passed directly. The latter are used to implement ELFv2 homogenous
3909 // float aggregates.
3910 if (FPR_idx != Num_FPR_Regs) {
3911 unsigned VReg;
3912
3913 if (ObjectVT == MVT::f32)
3914 VReg = MF.addLiveIn(FPR[FPR_idx],
3915 Subtarget.hasP8Vector()
3916 ? &PPC::VSSRCRegClass
3917 : &PPC::F4RCRegClass);
3918 else
3919 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3920 ? &PPC::VSFRCRegClass
3921 : &PPC::F8RCRegClass);
3922
3923 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3924 ++FPR_idx;
3925 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3926 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3927 // once we support fp <-> gpr moves.
3928
3929 // This can only ever happen in the presence of f32 array types,
3930 // since otherwise we never run out of FPRs before running out
3931 // of GPRs.
3932 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3933 FuncInfo->addLiveInAttr(VReg, Flags);
3934 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3935
3936 if (ObjectVT == MVT::f32) {
3937 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3938 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3939 DAG.getConstant(32, dl, MVT::i32));
3940 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3941 }
3942
3943 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3944 } else {
3945 if (CallConv == CallingConv::Fast)
3946 ComputeArgOffset();
3947
3948 needsLoad = true;
3949 }
3950
3951 // When passing an array of floats, the array occupies consecutive
3952 // space in the argument area; only round up to the next doubleword
3953 // at the end of the array. Otherwise, each float takes 8 bytes.
3954 if (CallConv != CallingConv::Fast || needsLoad) {
3955 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3956 ArgOffset += ArgSize;
3957 if (Flags.isInConsecutiveRegsLast())
3958 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3959 }
3960 break;
3961 case MVT::v4f32:
3962 case MVT::v4i32:
3963 case MVT::v8i16:
3964 case MVT::v16i8:
3965 case MVT::v2f64:
3966 case MVT::v2i64:
3967 case MVT::v1i128:
3968 case MVT::f128:
3969 if (!Subtarget.hasQPX()) {
3970 // These can be scalar arguments or elements of a vector array type
3971 // passed directly. The latter are used to implement ELFv2 homogenous
3972 // vector aggregates.
3973 if (VR_idx != Num_VR_Regs) {
3974 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3975 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3976 ++VR_idx;
3977 } else {
3978 if (CallConv == CallingConv::Fast)
3979 ComputeArgOffset();
3980 needsLoad = true;
3981 }
3982 if (CallConv != CallingConv::Fast || needsLoad)
3983 ArgOffset += 16;
3984 break;
3985 } // not QPX
3986
3987 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3988 "Invalid QPX parameter type");
3990
3991 case MVT::v4f64:
3992 case MVT::v4i1:
3993 // QPX vectors are treated like their scalar floating-point subregisters
3994 // (except that they're larger).
3995 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3996 if (QFPR_idx != Num_QFPR_Regs) {
3997 const TargetRegisterClass *RC;
3998 switch (ObjectVT.getSimpleVT().SimpleTy) {
3999 case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
4000 case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
4001 default: RC = &PPC::QBRCRegClass; break;
4002 }
4003
4004 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4005 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4006 ++QFPR_idx;
4007 } else {
4008 if (CallConv == CallingConv::Fast)
4009 ComputeArgOffset();
4010 needsLoad = true;
4011 }
4012 if (CallConv != CallingConv::Fast || needsLoad)
4013 ArgOffset += Sz;
4014 break;
4015 }
4016
4017 // We need to load the argument to a virtual register if we determined
4018 // above that we ran out of physical registers of the appropriate type.
4019 if (needsLoad) {
4020 if (ObjSize < ArgSize && !isLittleEndian)
4021 CurArgOffset += ArgSize - ObjSize;
4022 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4023 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4024 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4025 }
4026
4027 InVals.push_back(ArgVal);
4028 }
4029
4030 // Area that is at least reserved in the caller of this function.
4031 unsigned MinReservedArea;
4032 if (HasParameterArea)
4033 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4034 else
4035 MinReservedArea = LinkageSize;
4036
4037 // Set the size that is at least reserved in caller of this function. Tail
4038 // call optimized functions' reserved stack space needs to be aligned so that
4039 // taking the difference between two stack areas will result in an aligned
4040 // stack.
4041 MinReservedArea =
4042 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4043 FuncInfo->setMinReservedArea(MinReservedArea);
4044
4045 // If the function takes variable number of arguments, make a frame index for
4046 // the start of the first vararg value... for expansion of llvm.va_start.
4047 if (isVarArg) {
4048 int Depth = ArgOffset;
4049
4050 FuncInfo->setVarArgsFrameIndex(
4051 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4052 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4053
4054 // If this function is vararg, store any remaining integer argument regs
4055 // to their spots on the stack so that they may be loaded by dereferencing
4056 // the result of va_next.
4057 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4058 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4059 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4060 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4061 SDValue Store =
4062 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4063 MemOps.push_back(Store);
4064 // Increment the address by four for the next argument to store
4065 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4066 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4067 }
4068 }
4069
4070 if (!MemOps.empty())
4071 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4072
4073 return Chain;
4074}
4075
4076SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4077 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4078 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4079 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4080 // TODO: add description of PPC stack frame format, or at least some docs.
4081 //
4083 MachineFrameInfo &MFI = MF.getFrameInfo();
4084 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4085
4086 EVT PtrVT = getPointerTy(MF.getDataLayout());
4087 bool isPPC64 = PtrVT == MVT::i64;
4088 // Potential tail calls could cause overwriting of argument stack slots.
4089 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4090 (CallConv == CallingConv::Fast));
4091 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4092 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4093 unsigned ArgOffset = LinkageSize;
4094 // Area that is at least reserved in caller of this function.
4095 unsigned MinReservedArea = ArgOffset;
4096
4097 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4098 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4099 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4100 };
4101 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4102 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4103 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4104 };
4105 static const MCPhysReg VR[] = {
4106 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4107 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4108 };
4109
4110 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4111 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4112 const unsigned Num_VR_Regs = array_lengthof( VR);
4113
4114 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4115
4116 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4117
4118 // In 32-bit non-varargs functions, the stack space for vectors is after the
4119 // stack space for non-vectors. We do not use this space unless we have
4120 // too many vectors to fit in registers, something that only occurs in
4121 // constructed examples:), but we have to walk the arglist to figure
4122 // that out...for the pathological case, compute VecArgOffset as the
4123 // start of the vector parameter area. Computing VecArgOffset is the
4124 // entire point of the following loop.
4125 unsigned VecArgOffset = ArgOffset;
4126 if (!isVarArg && !isPPC64) {
4127 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4128 ++ArgNo) {
4129 EVT ObjectVT = Ins[ArgNo].VT;
4130 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4131
4132 if (Flags.isByVal()) {
4133 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4134 unsigned ObjSize = Flags.getByValSize();
4135 unsigned ArgSize =
4136 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4137 VecArgOffset += ArgSize;
4138 continue;
4139 }
4140
4141 switch(ObjectVT.getSimpleVT().SimpleTy) {
4142 default: llvm_unreachable("Unhandled argument type!");
4143 case MVT::i1:
4144 case MVT::i32:
4145 case MVT::f32:
4146 VecArgOffset += 4;
4147 break;
4148 case MVT::i64: // PPC64
4149 case MVT::f64:
4150 // FIXME: We are guaranteed to be !isPPC64 at this point.
4151 // Does MVT::i64 apply?
4152 VecArgOffset += 8;
4153 break;
4154 case MVT::v4f32:
4155 case MVT::v4i32:
4156 case MVT::v8i16:
4157 case MVT::v16i8:
4158 // Nothing to do, we're only looking at Nonvector args here.
4159 break;
4160 }
4161 }
4162 }
4163 // We've found where the vector parameter area in memory is. Skip the
4164 // first 12 parameters; these don't use that memory.
4165 VecArgOffset = ((VecArgOffset+15)/16)*16;
4166 VecArgOffset += 12*16;
4167
4168 // Add DAG nodes to load the arguments or copy them out of registers. On
4169 // entry to a function on PPC, the arguments start after the linkage area,
4170 // although the first ones are often in registers.
4171
4173 unsigned nAltivecParamsAtEnd = 0;
4174 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4175 unsigned CurArgIdx = 0;
4176 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4177 SDValue ArgVal;
4178 bool needsLoad = false;
4179 EVT ObjectVT = Ins[ArgNo].VT;
4180 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4181 unsigned ArgSize = ObjSize;
4182 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4183 if (Ins[ArgNo].isOrigArg()) {
4184 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4185 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4186 }
4187 unsigned CurArgOffset = ArgOffset;
4188
4189 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4190 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4191 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4192 if (isVarArg || isPPC64) {
4193 MinReservedArea = ((MinReservedArea+15)/16)*16;
4194 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4195 Flags,
4196 PtrByteSize);
4197 } else nAltivecParamsAtEnd++;
4198 } else
4199 // Calculate min reserved area.
4200 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4201 Flags,
4202 PtrByteSize);
4203
4204 // FIXME the codegen can be much improved in some cases.
4205 // We do not have to keep everything in memory.
4206 if (Flags.isByVal()) {
4207 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4208
4209 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4210 ObjSize = Flags.getByValSize();
4211 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4212 // Objects of size 1 and 2 are right justified, everything else is
4213 // left justified. This means the memory address is adjusted forwards.
4214 if (ObjSize==1 || ObjSize==2) {
4215 CurArgOffset = CurArgOffset + (4 - ObjSize);
4216 }
4217 // The value of the object is its address.
4218 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4219 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4220 InVals.push_back(FIN);
4221 if (ObjSize==1 || ObjSize==2) {
4222 if (GPR_idx != Num_GPR_Regs) {
4223 unsigned VReg;
4224 if (isPPC64)
4225 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4226 else
4227 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4228 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4229 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4230 SDValue Store =
4231 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4232 MachinePointerInfo(&*FuncArg), ObjType);
4233 MemOps.push_back(Store);
4234 ++GPR_idx;
4235 }
4236
4237 ArgOffset += PtrByteSize;
4238
4239 continue;
4240 }
4241 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4242 // Store whatever pieces of the object are in registers
4243 // to memory. ArgOffset will be the address of the beginning
4244 // of the object.
4245 if (GPR_idx != Num_GPR_Regs) {
4246 unsigned VReg;
4247 if (isPPC64)
4248 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4249 else
4250 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4251 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4252 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4253 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4254 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4255 MachinePointerInfo(&*FuncArg, j));
4256 MemOps.push_back(Store);
4257 ++GPR_idx;
4258 ArgOffset += PtrByteSize;
4259 } else {
4260 ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4261 break;
4262 }
4263 }
4264 continue;
4265 }
4266
4267 switch (ObjectVT.getSimpleVT().SimpleTy) {
4268 default: llvm_unreachable("Unhandled argument type!");
4269 case MVT::i1:
4270 case MVT::i32:
4271 if (!isPPC64) {
4272 if (GPR_idx != Num_GPR_Regs) {
4273 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4274 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4275
4276 if (ObjectVT == MVT::i1)
4277 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4278
4279 ++GPR_idx;
4280 } else {
4281 needsLoad = true;
4282 ArgSize = PtrByteSize;
4283 }
4284 // All int arguments reserve stack space in the Darwin ABI.
4285 ArgOffset += PtrByteSize;
4286 break;
4287 }
4289 case MVT::i64: // PPC64
4290 if (GPR_idx != Num_GPR_Regs) {
4291 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4292 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4293
4294 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4295 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4296 // value to MVT::i64 and then truncate to the correct register size.
4297 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4298
4299 ++GPR_idx;
4300 } else {
4301 needsLoad = true;
4302 ArgSize = PtrByteSize;
4303 }
4304 // All int arguments reserve stack space in the Darwin ABI.
4305 ArgOffset += 8;
4306 break;
4307
4308 case MVT::f32:
4309 case MVT::f64:
4310 // Every 4 bytes of argument space consumes one of the GPRs available for
4311 // argument passing.
4312 if (GPR_idx != Num_GPR_Regs) {
4313 ++GPR_idx;
4314 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4315 ++GPR_idx;
4316 }
4317 if (FPR_idx != Num_FPR_Regs) {
4318 unsigned VReg;
4319
4320 if (ObjectVT == MVT::f32)
4321 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4322 else
4323 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4324
4325 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4326 ++FPR_idx;
4327 } else {
4328 needsLoad = true;
4329 }
4330
4331 // All FP arguments reserve stack space in the Darwin ABI.
4332 ArgOffset += isPPC64 ? 8 : ObjSize;
4333 break;
4334 case MVT::v4f32:
4335 case MVT::v4i32:
4336 case MVT::v8i16:
4337 case MVT::v16i8:
4338 // Note that vector arguments in registers don't reserve stack space,
4339 // except in varargs functions.
4340 if (VR_idx != Num_VR_Regs) {
4341 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4342 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4343 if (isVarArg) {
4344 while ((ArgOffset % 16) != 0) {
4345 ArgOffset += PtrByteSize;
4346 if (GPR_idx != Num_GPR_Regs)
4347 GPR_idx++;
4348 }
4349 ArgOffset += 16;
4350 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4351 }
4352 ++VR_idx;
4353 } else {
4354 if (!isVarArg && !isPPC64) {
4355 // Vectors go after all the nonvectors.
4356 CurArgOffset = VecArgOffset;
4357 VecArgOffset += 16;
4358 } else {
4359 // Vectors are aligned.
4360 ArgOffset = ((ArgOffset+15)/16)*16;
4361 CurArgOffset = ArgOffset;
4362 ArgOffset += 16;
4363 }
4364 needsLoad = true;
4365 }
4366 break;
4367 }
4368
4369 // We need to load the argument to a virtual register if we determined above
4370 // that we ran out of physical registers of the appropriate type.
4371 if (needsLoad) {
4372 int FI = MFI.CreateFixedObject(ObjSize,
4373 CurArgOffset + (ArgSize - ObjSize),
4374 isImmutable);
4375 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4376 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4377 }
4378
4379 InVals.push_back(ArgVal);
4380 }
4381
4382 // Allow for Altivec parameters at the end, if needed.
4383 if (nAltivecParamsAtEnd) {
4384 MinReservedArea = ((MinReservedArea+15)/16)*16;
4385 MinReservedArea += 16*nAltivecParamsAtEnd;
4386 }
4387
4388 // Area that is at least reserved in the caller of this function.
4389 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4390
4391 // Set the size that is at least reserved in caller of this function. Tail
4392 // call optimized functions' reserved stack space needs to be aligned so that
4393 // taking the difference between two stack areas will result in an aligned
4394 // stack.
4395 MinReservedArea =
4396 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4397 FuncInfo->setMinReservedArea(MinReservedArea);
4398
4399 // If the function takes variable number of arguments, make a frame index for
4400 // the start of the first vararg value... for expansion of llvm.va_start.
4401 if (isVarArg) {
4402 int Depth = ArgOffset;
4403
4404 FuncInfo->setVarArgsFrameIndex(
4405 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4406 Depth, true));
4407 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4408
4409 // If this function is vararg, store any remaining integer argument regs
4410 // to their spots on the stack so that they may be loaded by dereferencing
4411 // the result of va_next.
4412 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4413 unsigned VReg;
4414
4415 if (isPPC64)
4416 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4417 else
4418 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4419
4420 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421 SDValue Store =
4422 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4423 MemOps.push_back(Store);
4424 // Increment the address by four for the next argument to store
4425 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4426 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4427 }
4428 }
4429
4430 if (!MemOps.empty())
4431 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4432
4433 return Chain;
4434}
4435
4436/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4437/// adjusted to accommodate the arguments for the tailcall.
4438static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4439 unsigned ParamSize) {
4440
4441 if (!isTailCall) return 0;
4442
4444 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4445 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4446 // Remember only if the new adjustment is bigger.
4447 if (SPDiff < FI->getTailCallSPDelta())
4448 FI->setTailCallSPDelta(SPDiff);
4449
4450 return SPDiff;
4451}
4452
4454
4455static bool
4457 const TargetMachine &TM) {
4458 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4459 // don't have enough information to determine if the caller and calle share
4460 // the same TOC base, so we have to pessimistically assume they don't for
4461 // correctness.
4462 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4463 if (!G)
4464 return false;
4465
4466 const GlobalValue *GV = G->getGlobal();
4467 // The medium and large code models are expected to provide a sufficiently
4468 // large TOC to provide all data addressing needs of a module with a
4469 // single TOC. Since each module will be addressed with a single TOC then we
4470 // only need to check that caller and callee don't cross dso boundaries.
4471 if (CodeModel::Medium == TM.getCodeModel() ||
4472 CodeModel::Large == TM.getCodeModel())
4473 return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
4474
4475 // Otherwise we need to ensure callee and caller are in the same section,
4476 // since the linker may allocate multiple TOCs, and we don't know which
4477 // sections will belong to the same TOC base.
4478
4479 if (!GV->isStrongDefinitionForLinker())
4480 return false;
4481
4482 // Any explicitly-specified sections and section prefixes must also match.
4483 // Also, if we're using -ffunction-sections, then each function is always in
4484 // a different section (the same is true for COMDAT functions).
4485 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4486 GV->getSection() != Caller->getSection())
4487 return false;
4488 if (const auto *F = dyn_cast<Function>(GV)) {
4489 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4490 return false;
4491 }
4492
4493 // If the callee might be interposed, then we can't assume the ultimate call
4494 // target will be in the same section. Even in cases where we can assume that
4495 // interposition won't happen, in any case where the linker might insert a
4496 // stub to allow for interposition, we must generate code as though
4497 // interposition might occur. To understand why this matters, consider a
4498 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4499 // in the same section, but a is in a different module (i.e. has a different
4500 // TOC base pointer). If the linker allows for interposition between b and c,
4501 // then it will generate a stub for the call edge between b and c which will
4502 // save the TOC pointer into the designated stack slot allocated by b. If we
4503 // return true here, and therefore allow a tail call between b and c, that
4504 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4505 // pointer into the stack slot allocated by a (where the a -> b stub saved
4506 // a's TOC base pointer). If we're not considering a tail call, but rather,
4507 // whether a nop is needed after the call instruction in b, because the linker
4508 // will insert a stub, it might complain about a missing nop if we omit it
4509 // (although many don't complain in this case).
4510 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4511 return false;
4512
4513 return true;
4514}
4515
4516static bool
4518 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4519 assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
4520
4521 const unsigned PtrByteSize = 8;
4522 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4523
4524 static const MCPhysReg GPR[] = {
4525 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4526 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4527 };
4528 static const MCPhysReg VR[] = {
4529 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4530 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4531 };
4532
4533 const unsigned NumGPRs = array_lengthof(GPR);
4534 const unsigned NumFPRs = 13;
4535 const unsigned NumVRs = array_lengthof(VR);
4536 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4537
4538 unsigned NumBytes = LinkageSize;
4539 unsigned AvailableFPRs = NumFPRs;
4540 unsigned AvailableVRs = NumVRs;
4541
4542 for (const ISD::OutputArg& Param : Outs) {
4543 if (Param.Flags.isNest()) continue;
4544
4545 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4546 PtrByteSize, LinkageSize, ParamAreaSize,
4547 NumBytes, AvailableFPRs, AvailableVRs,
4548 Subtarget.hasQPX()))
4549 return true;
4550 }
4551 return false;
4552}
4553
4554static bool
4556 if (CS.arg_size() != CallerFn->arg_size())
4557 return false;
4558
4559 ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
4560 ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
4561 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4562
4563 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4564 const Value* CalleeArg = *CalleeArgIter;
4565 const Value* CallerArg = &(*CallerArgIter);
4566 if (CalleeArg == CallerArg)
4567 continue;
4568
4569 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4570 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4571 // }
4572 // 1st argument of callee is undef and has the same type as caller.
4573 if (CalleeArg->getType() == CallerArg->getType() &&
4574 isa<UndefValue>(CalleeArg))
4575 continue;
4576
4577 return false;
4578 }
4579
4580 return true;
4581}
4582
4583// Returns true if TCO is possible between the callers and callees
4584// calling conventions.
4585static bool
4587 CallingConv::ID CalleeCC) {
4588 // Tail calls are possible with fastcc and ccc.
4589 auto isTailCallableCC = [] (CallingConv::ID CC){
4590 return CC == CallingConv::C || CC == CallingConv::Fast;
4591 };
4592 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4593 return false;
4594
4595 // We can safely tail call both fastcc and ccc callees from a c calling
4596 // convention caller. If the caller is fastcc, we may have less stack space
4597 // than a non-fastcc caller with the same signature so disable tail-calls in
4598 // that case.
4599 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4600}
4601
4602bool
4603PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4605 CallingConv::ID CalleeCC,
4607 bool isVarArg,
4610 SelectionDAG& DAG) const {
4611 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4612
4613 if (DisableSCO && !TailCallOpt) return false;
4614
4615 // Variadic argument functions are not supported.
4616 if (isVarArg) return false;
4617
4618 auto &Caller = DAG.getMachineFunction().getFunction();
4619 // Check that the calling conventions are compatible for tco.
4620 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4621 return false;
4622
4623 // Caller contains any byval parameter is not supported.
4624 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4625 return false;
4626
4627 // Callee contains any byval parameter is not supported, too.
4628 // Note: This is a quick work around, because in some cases, e.g.
4629 // caller's stack size > callee's stack size, we are still able to apply
4630 // sibling call optimization. For example, gcc is able to do SCO for caller1
4631 // in the following example, but not for caller2.
4632 // struct test {
4633 // long int a;
4634 // char ary[56];
4635 // } gTest;
4636 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4637 // b->a = v.a;
4638 // return 0;
4639 // }
4640 // void caller1(struct test a, struct test c, struct test *b) {
4641 // callee(gTest, b); }
4642 // void caller2(struct test *b) { callee(gTest, b); }
4643 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4644 return false;
4645
4646 // If callee and caller use different calling conventions, we cannot pass
4647 // parameters on stack since offsets for the parameter area may be different.
4648 if (Caller.getCallingConv() != CalleeCC &&
4649 needStackSlotPassParameters(Subtarget, Outs))
4650 return false;
4651
4652 // No TCO/SCO on indirect call because Caller have to restore its TOC
4654 !isa<ExternalSymbolSDNode>(Callee))
4655 return false;
4656
4657 // If the caller and callee potentially have different TOC bases then we
4658 // cannot tail call since we need to restore the TOC pointer after the call.
4659 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4660 if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4661 return false;
4662
4663 // TCO allows altering callee ABI, so we don't have to check further.
4664 if (CalleeCC == CallingConv::Fast && TailCallOpt)
4665 return true;
4666
4667 if (DisableSCO) return false;
4668
4669 // If callee use the same argument list that caller is using, then we can
4670 // apply SCO on this case. If it is not, then we need to check if callee needs
4671 // stack for passing arguments.
4672 if (!hasSameArgumentList(&Caller, CS) &&
4673 needStackSlotPassParameters(Subtarget, Outs)) {
4674 return false;
4675 }
4676
4677 return true;
4678}
4679
4680/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4681/// for tail call optimization. Targets which want to do tail call
4682/// optimization should implement this function.
4683bool
4684PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4685 CallingConv::ID CalleeCC,
4686 bool isVarArg,
4688 SelectionDAG& DAG) const {
4689 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4690 return false;
4691
4692 // Variable argument functions are not supported.
4693 if (isVarArg)
4694 return false;
4695
4697 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4698 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4699 // Functions containing by val parameters are not supported.
4700 for (unsigned i = 0; i != Ins.size(); i++) {
4701 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4702 if (Flags.isByVal()) return false;
4703 }
4704
4705 // Non-PIC/GOT tail calls are supported.
4706 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4707 return true;
4708
4709 // At the moment we can only do local tail calls (in same module, hidden
4710 // or protected) if we are generating PIC.
4711 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4712 return G->getGlobal()->hasHiddenVisibility()
4713 || G->getGlobal()->hasProtectedVisibility();
4714 }
4715
4716 return false;
4717}
4718
4719/// isCallCompatibleAddress - Return the immediate to use if the specified
4720/// 32-bit value is representable in the immediate field of a BxA instruction.
4722 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4723 if (!C) return nullptr;
4724
4725 int Addr = C->getZExtValue();
4726 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4727 SignExtend32<26>(Addr) != Addr)
4728 return nullptr; // Top 6 bits have to be sext of immediate.
4729
4730 return DAG
4731 .getConstant(
4732 (int)C->getZExtValue() >> 2, SDLoc(Op),
4734 .getNode();
4735}
4736
4737namespace {
4738
4739struct TailCallArgumentInfo {
4740 SDValue Arg;
4741 SDValue FrameIdxOp;
4742 int FrameIdx = 0;
4743
4744 TailCallArgumentInfo() = default;
4745};
4746
4747} // end anonymous namespace
4748
4749/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4751 SelectionDAG &DAG, SDValue Chain,
4752 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4753 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4754 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4755 SDValue Arg = TailCallArgs[i].Arg;
4756 SDValue FIN = TailCallArgs[i].FrameIdxOp;
4757 int FI = TailCallArgs[i].FrameIdx;
4758 // Store relative to framepointer.
4759 MemOpChains.push_back(DAG.getStore(
4760 Chain, dl, Arg, FIN,
4762 }
4763}
4764
4765/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4766/// the appropriate stack slot for the tail call optimized function call.
4768 SDValue OldRetAddr, SDValue OldFP,
4769 int SPDiff, const SDLoc &dl) {
4770 if (SPDiff) {
4771 // Calculate the new stack slot for the return address.
4773 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4774 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4775 bool isPPC64 = Subtarget.isPPC64();
4776 int SlotSize = isPPC64 ? 8 : 4;
4777 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4778 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4779 NewRetAddrLoc, true);
4780 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4781 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4782 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4783 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4784
4785 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4786 // slot as the FP is never overwritten.
4787 if (Subtarget.isDarwinABI()) {
4788 int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4789 int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
4790 true);
4791 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4792 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
4794 DAG.getMachineFunction(), NewFPIdx));
4795 }
4796 }
4797 return Chain;
4798}
4799
4800/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4801/// the position of the argument.
4802static void
4804 SDValue Arg, int SPDiff, unsigned ArgOffset,
4805 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4806 int Offset = ArgOffset + SPDiff;
4807 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4808 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4809 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4810 SDValue FIN = DAG.getFrameIndex(FI, VT);
4811 TailCallArgumentInfo Info;
4812 Info.Arg = Arg;
4813 Info.FrameIdxOp = FIN;
4814 Info.FrameIdx = FI;
4815 TailCallArguments.push_back(Info);
4816}
4817
4818/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4819/// stack slot. Returns the chain as result and the loaded frame pointers in
4820/// LROpOut/FPOpout. Used when tail calling.
4821SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4822 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4823 SDValue &FPOpOut, const SDLoc &dl) const {
4824 if (SPDiff) {
4825 // Load the LR and FP stack slot for later adjusting.
4826 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4827 LROpOut = getReturnAddrFrameIndex(DAG);
4828 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4829 Chain = SDValue(LROpOut.getNode(), 1);
4830
4831 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4832 // slot as the FP is never overwritten.
4833 if (Subtarget.isDarwinABI()) {
4834 FPOpOut = getFramePointerFrameIndex(DAG);
4835 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
4836 Chain = SDValue(FPOpOut.getNode(), 1);
4837 }
4838 }
4839 return Chain;
4840}
4841
4842/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4843/// by "Src" to address "Dst" of size "Size". Alignment information is
4844/// specified by the specific parameter attribute. The copy will be passed as
4845/// a byval function parameter.
4846/// Sometimes what we are copying is the end of a larger object, the part that
4847/// does not fit in registers.
4849 SDValue Chain, ISD::ArgFlagsTy Flags,
4850 SelectionDAG &DAG, const SDLoc &dl) {
4851 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4852 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4853 false, false, false, MachinePointerInfo(),
4855}
4856
4857/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4858/// tail calls.
4861 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4862 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4863 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4865 if (!isTailCall) {
4866 if (isVector) {
4867 SDValue StackPtr;
4868 if (isPPC64)
4869 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4870 else
4871 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4872 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4873 DAG.getConstant(ArgOffset, dl, PtrVT));
4874 }
4875 MemOpChains.push_back(
4876 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4877 // Calculate and remember argument location.
4878 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4879 TailCallArguments);
4880}
4881
4882static void
4884 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4885 SDValue FPOp,
4886 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4887 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4888 // might overwrite each other in case of tail call optimization.
4889 SmallVector<SDValue, 8> MemOpChains2;
4890 // Do not flag preceding copytoreg stuff together with the following stuff.
4891 InFlag = SDValue();
4892 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4893 MemOpChains2, dl);
4894 if (!MemOpChains2.empty())
4895 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4896
4897 // Store the return address to the appropriate stack slot.
4898 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4899
4900 // Emit callseq_end just before tailcall node.
4901 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4902 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4903 InFlag = Chain.getValue(1);
4904}
4905
4906// Is this global address that of a function that can be called by name? (as
4907// opposed to something that must hold a descriptor for an indirect call).
4909 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4910 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4911 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4912 return false;
4913
4914 return G->getGlobal()->getValueType()->isFunctionTy();
4915 }
4916
4917 return false;
4918}
4919
4920static unsigned
4922 SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4923 bool isPatchPoint, bool hasNest,
4924 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4925 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4926 ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
4927 bool isPPC64 = Subtarget.isPPC64();
4928 bool isSVR4ABI = Subtarget.isSVR4ABI();
4929 bool isELFv2ABI = Subtarget.isELFv2ABI();
4930 bool isAIXABI = Subtarget.isAIXABI();
4931
4933 NodeTys.push_back(MVT::Other); // Returns a chain
4934 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
4935
4936 unsigned CallOpc = PPCISD::CALL;
4937
4938 bool needIndirectCall = true;
4939 if (!isSVR4ABI || !isPPC64)
4940 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4941 // If this is an absolute destination address, use the munged value.
4942 Callee = SDValue(Dest, 0);
4943 needIndirectCall = false;
4944 }
4945
4946 // PC-relative references to external symbols should go through $stub, unless
4947 // we're building with the leopard linker or later, which automatically
4948 // synthesizes these stubs.
4949 const TargetMachine &TM = DAG.getTarget();
4951 const GlobalValue *GV = nullptr;
4952 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4953 GV = G->getGlobal();
4954 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4955 bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4956
4957 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4958 // every direct call is) turn it into a TargetGlobalAddress /
4959 // TargetExternalSymbol node so that legalize doesn't hack it.
4961 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4962
4963 // A call to a TLS address is actually an indirect call to a
4964 // thread-specific pointer.
4965 unsigned OpFlags = 0;
4966 if (UsePlt)
4967 OpFlags = PPCII::MO_PLT;
4968
4969 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4970 Callee.getValueType(), 0, OpFlags);
4971 needIndirectCall = false;
4972 }
4973
4974 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4975 unsigned char OpFlags = 0;
4976
4977 if (UsePlt)
4978 OpFlags = PPCII::MO_PLT;
4979
4980 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4981 OpFlags);
4982 needIndirectCall = false;
4983 }
4984
4985 if (isPatchPoint) {
4986 // We'll form an invalid direct call when lowering a patchpoint; the full
4987 // sequence for an indirect call is complicated, and many of the
4988 // instructions introduced might have side effects (and, thus, can't be
4989 // removed later). The call itself will be removed as soon as the
4990 // argument/return lowering is complete, so the fact that it has the wrong
4991 // kind of operands should not really matter.
4992 needIndirectCall = false;
4993 }
4994
4995 if (needIndirectCall) {
4996 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
4997 // to do the call, we can't use PPCISD::CALL.
4998 SDValue MTCTROps[] = {Chain, Callee, InFlag};
4999
5000 if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
5001 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5002 // entry point, but to the function descriptor (the function entry point
5003 // address is part of the function descriptor though).
5004 // The function descriptor is a three doubleword structure with the
5005 // following fields: function entry point, TOC base address and
5006 // environment pointer.
5007 // Thus for a call through a function pointer, the following actions need
5008 // to be performed:
5009 // 1. Save the TOC of the caller in the TOC save area of its stack
5010 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5011 // 2. Load the address of the function entry point from the function
5012 // descriptor.
5013 // 3. Load the TOC of the callee from the function descriptor into r2.
5014 // 4. Load the environment pointer from the function descriptor into
5015 // r11.
5016 // 5. Branch to the function entry point address.
5017 // 6. On return of the callee, the TOC of the caller needs to be
5018 // restored (this is done in FinishCall()).
5019 //
5020 // The loads are scheduled at the beginning of the call sequence, and the
5021 // register copies are flagged together to ensure that no other
5022 // operations can be scheduled in between. E.g. without flagging the
5023 // copies together, a TOC access in the caller could be scheduled between
5024 // the assignment of the callee TOC and the branch to the callee, which
5025 // results in the TOC access going through the TOC of the callee instead
5026 // of going through the TOC of the caller, which leads to incorrect code.
5027
5028 // Load the address of the function entry point from the function
5029 // descriptor.
5030 SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
5031 if (LDChain.getValueType() == MVT::Glue)
5032 LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
5033
5034 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5038
5039 MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
5040 SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
5041 /* Alignment = */ 8, MMOFlags);
5042
5043 // Load environment pointer into r11.
5044 SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
5045 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
5046 SDValue LoadEnvPtr =
5047 DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
5048 /* Alignment = */ 8, MMOFlags);
5049
5050 SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
5051 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
5052 SDValue TOCPtr =
5053 DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
5054 /* Alignment = */ 8, MMOFlags);
5055
5056 setUsesTOCBasePtr(DAG);
5057 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
5058 InFlag);
5059 Chain = TOCVal.getValue(0);
5060 InFlag = TOCVal.getValue(1);
5061
5062 // If the function call has an explicit 'nest' parameter, it takes the
5063 // place of the environment pointer.
5064 if (!hasNest) {
5065 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
5066 InFlag);
5067
5068 Chain = EnvVal.getValue(0);
5069 InFlag = EnvVal.getValue(1);
5070 }
5071
5072 MTCTROps[0] = Chain;
5073 MTCTROps[1] = LoadFuncPtr;
5074 MTCTROps[2] = InFlag;
5075 }
5076
5077 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
5078 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
5079 InFlag = Chain.getValue(1);
5080
5081 NodeTys.clear();
5082 NodeTys.push_back(MVT::Other);
5083 NodeTys.push_back(MVT::Glue);
5084 Ops.push_back(Chain);
5085 CallOpc = PPCISD::BCTRL;
5086 Callee.setNode(nullptr);
5087 // Add use of X11 (holding environment pointer)
5088 if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
5089 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
5090 // Add CTR register as callee so a bctr can be emitted later.
5091 if (isTailCall)
5092 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
5093 }
5094
5095 // If this is a direct call, pass the chain and the callee.
5096 if (Callee.getNode()) {
5097 Ops.push_back(Chain);
5098 Ops.push_back(Callee);
5099 }
5100 // If this is a tail call add stack pointer delta.
5101 if (isTailCall)
5102 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5103
5104 // Add argument registers to the end of the list so that they are known live
5105 // into the call.
5106 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5107 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5108 RegsToPass[i].second.getValueType()));
5109
5110 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5111 // live into the call.
5112 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5113 if ((isSVR4ABI && isPPC64) || isAIXABI) {
5114 setUsesTOCBasePtr(DAG);
5115
5116 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5117 // no way to mark dependencies as implicit here.
5118 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5119 if (!isPatchPoint)
5120 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
5121 : PPC::R2, PtrVT));
5122 }
5123
5124 return CallOpc;
5125}
5126
5127SDValue PPCTargetLowering::LowerCallResult(
5128 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5129 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5130 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5132 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5133 *DAG.getContext());
5134
5135 CCRetInfo.AnalyzeCallResult(
5136 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5138 : RetCC_PPC);
5139
5140 // Copy all of the result registers out of their specified physreg.
5141 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5142 CCValAssign &VA = RVLocs[i];
5143 assert(VA.isRegLoc() && "Can only return in registers!");
5144
5145 SDValue Val;
5146
5147 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5148 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5149 InFlag);
5150 Chain = Lo.getValue(1);
5151 InFlag = Lo.getValue(2);
5152 VA = RVLocs[++i]; // skip ahead to next loc
5153 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5154 InFlag);
5155 Chain = Hi.getValue(1);
5156 InFlag = Hi.getValue(2);
5157 if (!Subtarget.isLittleEndian())
5158 std::swap (Lo, Hi);
5159 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5160 } else {
5161 Val = DAG.getCopyFromReg(Chain, dl,
5162 VA.getLocReg(), VA.getLocVT(), InFlag);
5163 Chain = Val.getValue(1);
5164 InFlag = Val.getValue(2);
5165 }
5166
5167 switch (VA.getLocInfo()) {
5168 default: llvm_unreachable("Unknown loc info!");
5169 case CCValAssign::Full: break;
5170 case CCValAssign::AExt:
5171 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5172 break;
5173 case CCValAssign::ZExt:
5174 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5175 DAG.getValueType(VA.getValVT()));
5176 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5177 break;
5178 case CCValAssign::SExt:
5179 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5180 DAG.getValueType(VA.getValVT()));
5181 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5182 break;
5183 }
5184
5185 InVals.push_back(Val);
5186 }
5187
5188 return Chain;
5189}
5190
5191SDValue PPCTargetLowering::FinishCall(
5192 CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
5193 bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
5194 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
5195 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5196 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5197 SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
5198 std::vector<EVT> NodeTys;
5200 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
5201 SPDiff, isTailCall, isPatchPoint, hasNest,
5202 RegsToPass, Ops, NodeTys, CS, Subtarget);
5203
5204 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5205 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
5206 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5207
5208 // When performing tail call optimization the callee pops its arguments off
5209 // the stack. Account for this here so these bytes can be pushed back on in
5210 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5211 int BytesCalleePops =
5212 (CallConv == CallingConv::Fast &&
5214
5215 // Add a register mask operand representing the call-preserved registers.
5216 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5217 const uint32_t *Mask =
5218 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
5219 assert(Mask && "Missing call preserved mask for calling convention");
5220 Ops.push_back(DAG.getRegisterMask(Mask));
5221
5222 if (InFlag.getNode())
5223 Ops.push_back(InFlag);
5224
5225 // Emit tail call.
5226 if (isTailCall) {
5227 assert(((Callee.getOpcode() == ISD::Register &&
5228 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5229 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5230 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5231 isa<ConstantSDNode>(Callee)) &&
5232 "Expecting an global address, external symbol, absolute value or register");
5233
5235 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
5236 }
5237
5238 // Add a NOP immediately after the branch instruction when using the 64-bit
5239 // SVR4 or the AIX ABI.
5240 // At link time, if caller and callee are in a different module and
5241 // thus have a different TOC, the call will be replaced with a call to a stub
5242 // function which saves the current TOC, loads the TOC of the callee and
5243 // branches to the callee. The NOP will be replaced with a load instruction
5244 // which restores the TOC of the caller from the TOC save slot of the current
5245 // stack frame. If caller and callee belong to the same module (and have the
5246 // same TOC), the NOP will remain unchanged, or become some other NOP.
5247
5249 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5250 if (!isTailCall && !isPatchPoint &&
5251 ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
5252 Subtarget.isAIXABI())) {
5253 if (CallOpc == PPCISD::BCTRL) {
5254 if (Subtarget.isAIXABI())
5255 report_fatal_error("Indirect call on AIX is not implemented.");
5256
5257 // This is a call through a function pointer.
5258 // Restore the caller TOC from the save area into R2.
5259 // See PrepareCall() for more information about calls through function
5260 // pointers in the 64-bit SVR4 ABI.
5261 // We are using a target-specific load with r2 hard coded, because the
5262 // result of a target-independent load would never go directly into r2,
5263 // since r2 is a reserved register (which prevents the register allocator
5264 // from allocating it), resulting in an additional register being
5265 // allocated and an unnecessary move instruction being generated.
5266 CallOpc = PPCISD::BCTRL_LOAD_TOC;
5267
5268 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
5269 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5270 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5271 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
5272
5273 // The address needs to go after the chain input but before the flag (or
5274 // any other variadic arguments).
5275 Ops.insert(std::next(Ops.begin()), AddTOC);
5276 } else if (CallOpc == PPCISD::CALL &&
5278 // Otherwise insert NOP for non-local calls.
5279 CallOpc = PPCISD::CALL_NOP;
5280 }
5281 }
5282
5283 if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
5284 // On AIX, direct function calls reference the symbol for the function's
5285 // entry point, which is named by inserting a "." before the function's
5286 // C-linkage name.
5287 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
5288 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5289 MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
5290 Twine(G->getGlobal()->getName()));
5291 Callee = DAG.getMCSymbol(S, PtrVT);
5292 // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5293 Ops[1] = Callee;
5294 }
5295
5296 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
5297 InFlag = Chain.getValue(1);
5298
5299 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5300 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5301 InFlag, dl);
5302 if (!Ins.empty())
5303 InFlag = Chain.getValue(1);
5304
5305 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
5306 Ins, dl, DAG, InVals);
5307}
5308
5309SDValue
5310PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5311 SmallVectorImpl<SDValue> &InVals) const {
5312 SelectionDAG &DAG = CLI.DAG;
5313 SDLoc &dl = CLI.DL;
5315 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5317 SDValue Chain = CLI.Chain;
5318 SDValue Callee = CLI.Callee;
5319 bool &isTailCall = CLI.IsTailCall;
5320 CallingConv::ID CallConv = CLI.CallConv;
5321 bool isVarArg = CLI.IsVarArg;
5322 bool isPatchPoint = CLI.IsPatchPoint;
5323 ImmutableCallSite CS = CLI.CS;
5324
5325 if (isTailCall) {
5326 if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
5327 isTailCall = false;
5328 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5329 isTailCall =
5330 IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
5331 isVarArg, Outs, Ins, DAG);
5332 else
5333 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5334 Ins, DAG);
5335 if (isTailCall) {
5336 ++NumTailCalls;
5337 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5338 ++NumSiblingCalls;
5339
5340 assert(isa<GlobalAddressSDNode>(Callee) &&
5341 "Callee should be an llvm::Function object.");
5342 LLVM_DEBUG(
5343 const GlobalValue *GV =
5344 cast<GlobalAddressSDNode>(Callee)->getGlobal();
5345 const unsigned Width =
5346 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5347 dbgs() << "TCO caller: "
5348 << left_justify(DAG.getMachineFunction().getName(), Width)
5349 << ", callee linkage: " << GV->getVisibility() << ", "
5350 << GV->getLinkage() << "\n");
5351 }
5352 }
5353
5354 if (!isTailCall && CS && CS.isMustTailCall())
5355 report_fatal_error("failed to perform tail call elimination on a call "
5356 "site marked musttail");
5357
5358 // When long calls (i.e. indirect calls) are always used, calls are always
5359 // made via function pointer. If we have a function name, first translate it
5360 // into a pointer.
5361 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5362 !isTailCall)
5363 Callee = LowerGlobalAddress(Callee, DAG);
5364
5365 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5366 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
5367 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5368 dl, DAG, InVals, CS);
5369
5370 if (Subtarget.isSVR4ABI())
5371 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
5372 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5373 dl, DAG, InVals, CS);
5374
5375 if (Subtarget.isAIXABI())
5376 return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
5377 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5378 dl, DAG, InVals, CS);
5379
5380 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
5381 isTailCall, isPatchPoint, Outs, OutVals, Ins,
5382 dl, DAG, InVals, CS);
5383}
5384
5385SDValue PPCTargetLowering::LowerCall_32SVR4(
5386 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5387 bool isTailCall, bool isPatchPoint,
5389 const SmallVectorImpl<SDValue> &OutVals,
5390 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5392 ImmutableCallSite CS) const {
5393 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5394 // of the 32-bit SVR4 ABI stack frame layout.
5395
5396 assert((CallConv == CallingConv::C ||
5397 CallConv == CallingConv::Cold ||
5398 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5399
5400 unsigned PtrByteSize = 4;
5401
5403
5404 // Mark this function as potentially containing a function that contains a
5405 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5406 // and restoring the callers stack pointer in this functions epilog. This is
5407 // done because by tail calling the called function might overwrite the value
5408 // in this function's (MF) stack pointer stack slot 0(SP).
5409 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5410 CallConv == CallingConv::Fast)
5411 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5412
5413 // Count how many bytes are to be pushed on the stack, including the linkage
5414 // area, parameter list area and the part of the local variable space which
5415 // contains copies of aggregates which are passed by value.
5416
5417 // Assign locations to all of the outgoing arguments.
5419 PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5420
5421 // Reserve space for the linkage area on the stack.
5422 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5423 PtrByteSize);
5424 if (useSoftFloat())
5425 CCInfo.PreAnalyzeCallOperands(Outs);
5426
5427 if (isVarArg) {
5428 // Handle fixed and variable vector arguments differently.
5429 // Fixed vector arguments go into registers as long as registers are
5430 // available. Variable vector arguments always go into memory.
5431 unsigned NumArgs = Outs.size();
5432
5433 for (unsigned i = 0; i != NumArgs; ++i) {
5434 MVT ArgVT = Outs[i].VT;
5435 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5436 bool Result;
5437
5438 if (Outs[i].IsFixed) {
5439 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5440 CCInfo);
5441 } else {
5443 ArgFlags, CCInfo);
5444 }
5445
5446 if (Result) {
5447#ifndef NDEBUG
5448 errs() << "Call operand #" << i << " has unhandled type "
5449 << EVT(ArgVT).getEVTString() << "\n";
5450#endif
5451 llvm_unreachable(nullptr);
5452 }
5453 }
5454 } else {
5455 // All arguments are treated the same.
5456 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5457 }
5458 CCInfo.clearWasPPCF128();
5459
5460 // Assign locations to all of the outgoing aggregate by value arguments.
5461 SmallVector<CCValAssign, 16> ByValArgLocs;
5462 CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
5463
5464 // Reserve stack space for the allocations in CCInfo.
5465 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
5466
5467 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5468
5469 // Size of the linkage area, parameter list area and the part of the local
5470 // space variable where copies of aggregates which are passed by value are
5471 // stored.
5472 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5473
5474 // Calculate by how many bytes the stack has to be adjusted in case of tail
5475 // call optimization.
5476 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5477
5478 // Adjust the stack pointer for the new arguments...
5479 // These operations are automatically eliminated by the prolog/epilog pass
5480 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5481 SDValue CallSeqStart = Chain;
5482
5483 // Load the return address and frame pointer so it can be moved somewhere else
5484 // later.
5485 SDValue LROp, FPOp;
5486 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5487
5488 // Set up a copy of the stack pointer for use loading and storing any
5489 // arguments that may not fit in the registers available for argument
5490 // passing.
5491 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5492
5494 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5495 SmallVector<SDValue, 8> MemOpChains;
5496
5497 bool seenFloatArg = false;
5498 // Walk the register/memloc assignments, inserting copies/loads.
5499 // i - Tracks the index into the list of registers allocated for the call
5500 // RealArgIdx - Tracks the index into the list of actual function arguments
5501 // j - Tracks the index into the list of byval arguments
5502 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5503 i != e;
5504 ++i, ++RealArgIdx) {
5505 CCValAssign &VA = ArgLocs[i];
5506 SDValue Arg = OutVals[RealArgIdx];
5507 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5508
5509 if (Flags.isByVal()) {
5510 // Argument is an aggregate which is passed by value, thus we need to
5511 // create a copy of it in the local variable space of the current stack
5512 // frame (which is the stack frame of the caller) and pass the address of
5513 // this copy to the callee.
5514 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5515 CCValAssign &ByValVA = ByValArgLocs[j++];
5516 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5517
5518 // Memory reserved in the local variable space of the callers stack frame.
5519 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5520
5521 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5522 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5523 StackPtr, PtrOff);
5524
5525 // Create a copy of the argument in the local area of the current
5526 // stack frame.
5527 SDValue MemcpyCall =
5529 CallSeqStart.getNode()->getOperand(0),
5530 Flags, DAG, dl);
5531
5532 // This must go outside the CALLSEQ_START..END.
5533 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5534 SDLoc(MemcpyCall));
5535 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5536 NewCallSeqStart.getNode());
5537 Chain = CallSeqStart = NewCallSeqStart;
5538
5539 // Pass the address of the aggregate copy on the stack either in a
5540 // physical register or in the parameter list area of the current stack
5541 // frame to the callee.
5542 Arg = PtrOff;
5543 }
5544
5545 // When useCRBits() is true, there can be i1 arguments.
5546 // It is because getRegisterType(MVT::i1) => MVT::i1,
5547 // and for other integer types getRegisterType() => MVT::i32.
5548 // Extend i1 and ensure callee will get i32.
5549 if (Arg.getValueType() == MVT::i1)
5551 dl, MVT::i32, Arg);
5552
5553 if (VA.isRegLoc()) {
5554 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5555 // Put argument in a physical register.
5556 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5557 bool IsLE = Subtarget.isLittleEndian();
5559 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5560 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5561 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5562 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5563 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5564 SVal.getValue(0)));
5565 } else
5566 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5567 } else {
5568 // Put argument in the parameter list area of the current stack frame.
5569 assert(VA.isMemLoc());
5570 unsigned LocMemOffset = VA.getLocMemOffset();
5571
5572 if (!isTailCall) {
5573 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5574 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5575 StackPtr, PtrOff);
5576
5577 MemOpChains.push_back(
5578 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5579 } else {
5580 // Calculate and remember argument location.
5581 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5582 TailCallArguments);
5583 }
5584 }
5585 }
5586
5587 if (!MemOpChains.empty())
5588 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5589
5590 // Build a sequence of copy-to-reg nodes chained together with token chain
5591 // and flag operands which copy the outgoing args into the appropriate regs.
5592 SDValue InFlag;
5593 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5594 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5595 RegsToPass[i].second, InFlag);
5596 InFlag = Chain.getValue(1);
5597 }
5598
5599 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5600 // registers.
5601 if (isVarArg) {
5603 SDValue Ops[] = { Chain, InFlag };
5604
5605 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5606 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5607
5608 InFlag = Chain.getValue(1);
5609 }
5610
5611 if (isTailCall)
5612 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5613 TailCallArguments);
5614
5615 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
5616 /* unused except on PPC64 ELFv1 */ false, DAG,
5617 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5618 NumBytes, Ins, InVals, CS);
5619}
5620
5621// Copy an argument into memory, being careful to do this outside the
5622// call sequence for the call to which the argument belongs.
5623SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5624 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5625 SelectionDAG &DAG, const SDLoc &dl) const {
5626 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5627 CallSeqStart.getNode()->getOperand(0),
5628 Flags, DAG, dl);
5629 // The MEMCPY must go outside the CALLSEQ_START..END.
5630 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5631 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5632 SDLoc(MemcpyCall));
5633 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5634 NewCallSeqStart.getNode());
5635 return NewCallSeqStart;
5636}
5637
5638SDValue PPCTargetLowering::LowerCall_64SVR4(
5639 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5640 bool isTailCall, bool isPatchPoint,
5642 const SmallVectorImpl<SDValue> &OutVals,
5643 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5645 ImmutableCallSite CS) const {
5646 bool isELFv2ABI = Subtarget.isELFv2ABI();
5647 bool isLittleEndian = Subtarget.isLittleEndian();
5648 unsigned NumOps = Outs.size();
5649 bool hasNest = false;
5650 bool IsSibCall = false;
5651
5652 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5653 unsigned PtrByteSize = 8;
5654
5656
5657 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5658 IsSibCall = true;
5659
5660 // Mark this function as potentially containing a function that contains a
5661 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5662 // and restoring the callers stack pointer in this functions epilog. This is
5663 // done because by tail calling the called function might overwrite the value
5664 // in this function's (MF) stack pointer stack slot 0(SP).
5665 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5666 CallConv == CallingConv::Fast)
5667 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5668
5669 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5670 "fastcc not supported on varargs functions");
5671
5672 // Count how many bytes are to be pushed on the stack, including the linkage
5673 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5674 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5675 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5676 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5677 unsigned NumBytes = LinkageSize;
5678 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5679 unsigned &QFPR_idx = FPR_idx;
5680
5681 static const MCPhysReg GPR[] = {
5682 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5683 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5684 };
5685 static const MCPhysReg VR[] = {
5686 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5687 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5688 };
5689
5690 const unsigned NumGPRs = array_lengthof(GPR);
5691 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5692 const unsigned NumVRs = array_lengthof(VR);
5693 const unsigned NumQFPRs = NumFPRs;
5694
5695 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5696 // can be passed to the callee in registers.
5697 // For the fast calling convention, there is another check below.
5698 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5699 bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
5700 if (!HasParameterArea) {
5701 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5702 unsigned AvailableFPRs = NumFPRs;
5703 unsigned AvailableVRs = NumVRs;
5704 unsigned NumBytesTmp = NumBytes;
5705 for (unsigned i = 0; i != NumOps; ++i) {
5706 if (Outs[i].Flags.isNest()) continue;
5707 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5708 PtrByteSize, LinkageSize, ParamAreaSize,
5709 NumBytesTmp, AvailableFPRs, AvailableVRs,
5710 Subtarget.hasQPX()))
5711 HasParameterArea = true;
5712 }
5713 }
5714
5715 // When using the fast calling convention, we don't provide backing for
5716 // arguments that will be in registers.
5717 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5718
5719 // Avoid allocating parameter area for fastcc functions if all the arguments
5720 // can be passed in the registers.
5721 if (CallConv == CallingConv::Fast)
5722 HasParameterArea = false;
5723
5724 // Add up all the space actually used.
5725 for (unsigned i = 0; i != NumOps; ++i) {
5726 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5727 EVT ArgVT = Outs[i].VT;
5728 EVT OrigVT = Outs[i].ArgVT;
5729
5730 if (Flags.isNest())
5731 continue;
5732
5733 if (CallConv == CallingConv::Fast) {
5734 if (Flags.isByVal()) {
5735 NumGPRsUsed += (Flags.getByValSize()+7)/8;
5736 if (NumGPRsUsed > NumGPRs)
5737 HasParameterArea = true;
5738 } else {
5739 switch (ArgVT.getSimpleVT().SimpleTy) {
5740 default: llvm_unreachable("Unexpected ValueType for argument!");
5741 case MVT::i1:
5742 case MVT::i32:
5743 case MVT::i64:
5744 if (++NumGPRsUsed <= NumGPRs)
5745 continue;
5746 break;
5747 case MVT::v4i32:
5748 case MVT::v8i16:
5749 case MVT::v16i8:
5750 case MVT::v2f64:
5751 case MVT::v2i64:
5752 case MVT::v1i128:
5753 case MVT::f128:
5754 if (++NumVRsUsed <= NumVRs)
5755 continue;
5756 break;
5757 case MVT::v4f32:
5758 // When using QPX, this is handled like a FP register, otherwise, it
5759 // is an Altivec register.
5760 if (Subtarget.hasQPX()) {
5761 if (++NumFPRsUsed <= NumFPRs)
5762 continue;
5763 } else {
5764 if (++NumVRsUsed <= NumVRs)
5765 continue;
5766 }
5767 break;
5768 case MVT::f32:
5769 case MVT::f64:
5770 case MVT::v4f64: // QPX
5771 case MVT::v4i1: // QPX
5772 if (++NumFPRsUsed <= NumFPRs)
5773 continue;
5774 break;
5775 }
5776 HasParameterArea = true;
5777 }
5778 }
5779
5780 /* Respect alignment of argument on the stack. */
5781 unsigned Align =
5782 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5783 NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5784
5785 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5786 if (Flags.isInConsecutiveRegsLast())
5787 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5788 }
5789
5790 unsigned NumBytesActuallyUsed = NumBytes;
5791
5792 // In the old ELFv1 ABI,
5793 // the prolog code of the callee may store up to 8 GPR argument registers to
5794 // the stack, allowing va_start to index over them in memory if its varargs.
5795 // Because we cannot tell if this is needed on the caller side, we have to
5796 // conservatively assume that it is needed. As such, make sure we have at
5797 // least enough stack space for the caller to store the 8 GPRs.
5798 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5799 // really requires memory operands, e.g. a vararg function.
5800 if (HasParameterArea)
5801 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5802 else
5803 NumBytes = LinkageSize;
5804
5805 // Tail call needs the stack to be aligned.
5806 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5807 CallConv == CallingConv::Fast)
5808 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5809
5810 int SPDiff = 0;
5811
5812 // Calculate by how many bytes the stack has to be adjusted in case of tail
5813 // call optimization.
5814 if (!IsSibCall)
5815 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5816
5817 // To protect arguments on the stack from being clobbered in a tail call,
5818 // force all the loads to happen before doing any other lowering.
5819 if (isTailCall)
5820 Chain = DAG.getStackArgumentTokenFactor(Chain);
5821
5822 // Adjust the stack pointer for the new arguments...
5823 // These operations are automatically eliminated by the prolog/epilog pass
5824 if (!IsSibCall)
5825 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5826 SDValue CallSeqStart = Chain;
5827
5828 // Load the return address and frame pointer so it can be move somewhere else
5829 // later.
5830 SDValue LROp, FPOp;
5831 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5832
5833 // Set up a copy of the stack pointer for use loading and storing any
5834 // arguments that may not fit in the registers available for argument
5835 // passing.
5836 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5837
5838 // Figure out which arguments are going to go in registers, and which in
5839 // memory. Also, if this is a vararg function, floating point operations
5840 // must be stored to our stack, and loaded into integer regs as well, if
5841 // any integer regs are available for argument passing.
5842 unsigned ArgOffset = LinkageSize;
5843
5845 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5846
5847 SmallVector<SDValue, 8> MemOpChains;
5848 for (unsigned i = 0; i != NumOps; ++i) {
5849 SDValue Arg = OutVals[i];
5850 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5851 EVT ArgVT = Outs[i].VT;
5852 EVT OrigVT = Outs[i].ArgVT;
5853
5854 // PtrOff will be used to store the current argument to the stack if a
5855 // register cannot be found for it.
5856 SDValue PtrOff;
5857
5858 // We re-align the argument offset for each argument, except when using the
5859 // fast calling convention, when we need to make sure we do that only when
5860 // we'll actually use a stack slot.
5861 auto ComputePtrOff = [&]() {
5862 /* Respect alignment of argument on the stack. */
5863 unsigned Align =
5864 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5865 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5866
5867 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5868
5869 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5870 };
5871
5872 if (CallConv != CallingConv::Fast) {
5873 ComputePtrOff();
5874
5875 /* Compute GPR index associated with argument offset. */
5876 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5877 GPR_idx = std::min(GPR_idx, NumGPRs);
5878 }
5879
5880 // Promote integers to 64-bit values.
5881 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5882 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5883 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5884 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5885 }
5886
5887 // FIXME memcpy is used way more than necessary. Correctness first.
5888 // Note: "by value" is code for passing a structure by value, not
5889 // basic types.
5890 if (Flags.isByVal()) {
5891 // Note: Size includes alignment padding, so
5892 // struct x { short a; char b; }
5893 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5894 // These are the proper values we need for right-justifying the
5895 // aggregate in a parameter register.
5896 unsigned Size = Flags.getByValSize();
5897
5898 // An empty aggregate parameter takes up no storage and no
5899 // registers.
5900 if (Size == 0)
5901 continue;
5902
5903 if (CallConv == CallingConv::Fast)
5904 ComputePtrOff();
5905
5906 // All aggregates smaller than 8 bytes must be passed right-justified.
5907 if (Size==1 || Size==2 || Size==4) {
5908 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5909 if (GPR_idx != NumGPRs) {
5910 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5911 MachinePointerInfo(), VT);
5912 MemOpChains.push_back(Load.getValue(1));
5913 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5914
5915 ArgOffset += PtrByteSize;
5916 continue;
5917 }
5918 }
5919
5920 if (GPR_idx == NumGPRs && Size < 8) {
5921 SDValue AddPtr = PtrOff;
5922 if (!isLittleEndian) {
5923 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5924 PtrOff.getValueType());
5925 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5926 }
5927 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5928 CallSeqStart,
5929 Flags, DAG, dl);
5930 ArgOffset += PtrByteSize;
5931 continue;
5932 }
5933 // Copy entire object into memory. There are cases where gcc-generated
5934 // code assumes it is there, even if it could be put entirely into
5935 // registers. (This is not what the doc says.)
5936
5937 // FIXME: The above statement is likely due to a misunderstanding of the
5938 // documents. All arguments must be copied into the parameter area BY
5939 // THE CALLEE in the event that the callee takes the address of any
5940 // formal argument. That has not yet been implemented. However, it is
5941 // reasonable to use the stack area as a staging area for the register
5942 // load.
5943
5944 // Skip this for small aggregates, as we will use the same slot for a
5945 // right-justified copy, below.
5946 if (Size >= 8)
5947 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5948 CallSeqStart,
5949 Flags, DAG, dl);
5950
5951 // When a register is available, pass a small aggregate right-justified.
5952 if (Size < 8 && GPR_idx != NumGPRs) {
5953 // The easiest way to get this right-justified in a register
5954 // is to copy the structure into the rightmost portion of a
5955 // local variable slot, then load the whole slot into the
5956 // register.
5957 // FIXME: The memcpy seems to produce pretty awful code for
5958 // small aggregates, particularly for packed ones.
5959 // FIXME: It would be preferable to use the slot in the
5960 // parameter save area instead of a new local variable.
5961 SDValue AddPtr = PtrOff;
5962 if (!isLittleEndian) {
5963 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5964 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5965 }
5966 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5967 CallSeqStart,
5968 Flags, DAG, dl);
5969
5970 // Load the slot into the register.
5971 SDValue Load =
5972 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5973 MemOpChains.push_back(Load.getValue(1));
5974 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5975
5976 // Done with this argument.
5977 ArgOffset += PtrByteSize;
5978 continue;
5979 }
5980
5981 // For aggregates larger than PtrByteSize, copy the pieces of the
5982 // object that fit into registers from the parameter save area.
5983 for (unsigned j=0; j<Size; j+=PtrByteSize) {
5984 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5985 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5986 if (GPR_idx != NumGPRs) {
5987 SDValue Load =
5988 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
5989 MemOpChains.push_back(Load.getValue(1));
5990 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5991 ArgOffset += PtrByteSize;
5992 } else {
5993 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5994 break;
5995 }
5996 }
5997 continue;
5998 }
5999
6000 switch (Arg.getSimpleValueType().SimpleTy) {
6001 default: llvm_unreachable("Unexpected ValueType for argument!");
6002 case MVT::i1:
6003 case MVT::i32:
6004 case MVT::i64:
6005 if (Flags.isNest()) {
6006 // The 'nest' parameter, if any, is passed in R11.
6007 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6008 hasNest = true;
6009 break;
6010 }
6011
6012 // These can be scalar arguments or elements of an integer array type
6013 // passed directly. Clang may use those instead of "byval" aggregate
6014 // types to avoid forcing arguments to memory unnecessarily.
6015 if (GPR_idx != NumGPRs) {
6016 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6017 } else {
6018 if (CallConv == CallingConv::Fast)
6019 ComputePtrOff();
6020
6021 assert(HasParameterArea &&
6022 "Parameter area must exist to pass an argument in memory.");
6023 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6024 true, isTailCall, false, MemOpChains,
6025 TailCallArguments, dl);
6026 if (CallConv == CallingConv::Fast)
6027 ArgOffset += PtrByteSize;
6028 }
6029 if (CallConv != CallingConv::Fast)
6030 ArgOffset += PtrByteSize;
6031 break;
6032 case MVT::f32:
6033 case MVT::f64: {
6034 // These can be scalar arguments or elements of a float array type
6035 // passed directly. The latter are used to implement ELFv2 homogenous
6036 // float aggregates.
6037
6038 // Named arguments go into FPRs first, and once they overflow, the
6039 // remaining arguments go into GPRs and then the parameter save area.
6040 // Unnamed arguments for vararg functions always go to GPRs and
6041 // then the parameter save area. For now, put all arguments to vararg
6042 // routines always in both locations (FPR *and* GPR or stack slot).
6043 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
6044 bool NeededLoad = false;
6045
6046 // First load the argument into the next available FPR.
6047 if (FPR_idx != NumFPRs)
6048 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6049
6050 // Next, load the argument into GPR or stack slot if needed.
6051 if (!NeedGPROrStack)
6052 ;
6053 else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
6054 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6055 // once we support fp <-> gpr moves.
6056
6057 // In the non-vararg case, this can only ever happen in the
6058 // presence of f32 array types, since otherwise we never run
6059 // out of FPRs before running out of GPRs.
6060 SDValue ArgVal;
6061
6062 // Double values are always passed in a single GPR.
6063 if (Arg.getValueType() != MVT::f32) {
6064 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6065
6066 // Non-array float values are extended and passed in a GPR.
6067 } else if (!Flags.isInConsecutiveRegs()) {
6068 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6069 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6070
6071 // If we have an array of floats, we collect every odd element
6072 // together with its predecessor into one GPR.
6073 } else if (ArgOffset % PtrByteSize != 0) {
6074 SDValue Lo, Hi;
6075 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6076 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6077 if (!isLittleEndian)
6078 std::swap(Lo, Hi);
6079 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6080
6081 // The final element, if even, goes into the first half of a GPR.
6082 } else if (Flags.isInConsecutiveRegsLast()) {
6083 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6084 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6085 if (!isLittleEndian)
6086 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6087 DAG.getConstant(32, dl, MVT::i32));
6088
6089 // Non-final even elements are skipped; they will be handled
6090 // together the with subsequent argument on the next go-around.
6091 } else
6092 ArgVal = SDValue();
6093
6094 if (ArgVal.getNode())
6095 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6096 } else {
6097 if (CallConv == CallingConv::Fast)
6098 ComputePtrOff();
6099
6100 // Single-precision floating-point values are mapped to the
6101 // second (rightmost) word of the stack doubleword.
6102 if (Arg.getValueType() == MVT::f32 &&
6103 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6104 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6105 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6106 }
6107
6108 assert(HasParameterArea &&
6109 "Parameter area must exist to pass an argument in memory.");
6110 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6111 true, isTailCall, false, MemOpChains,
6112 TailCallArguments, dl);
6113
6114 NeededLoad = true;
6115 }
6116 // When passing an array of floats, the array occupies consecutive
6117 // space in the argument area; only round up to the next doubleword
6118 // at the end of the array. Otherwise, each float takes 8 bytes.
6119 if (CallConv != CallingConv::Fast || NeededLoad) {
6120 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6121 Flags.isInConsecutiveRegs()) ? 4 : 8;
6122 if (Flags.isInConsecutiveRegsLast())
6123 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6124 }
6125 break;
6126 }
6127 case MVT::v4f32:
6128 case MVT::v4i32:
6129 case MVT::v8i16:
6130 case MVT::v16i8:
6131 case MVT::v2f64:
6132 case MVT::v2i64:
6133 case MVT::v1i128:
6134 case MVT::f128:
6135 if (!Subtarget.hasQPX()) {
6136 // These can be scalar arguments or elements of a vector array type
6137 // passed directly. The latter are used to implement ELFv2 homogenous
6138 // vector aggregates.
6139
6140 // For a varargs call, named arguments go into VRs or on the stack as
6141 // usual; unnamed arguments always go to the stack or the corresponding
6142 // GPRs when within range. For now, we always put the value in both
6143 // locations (or even all three).
6144 if (isVarArg) {
6145 assert(HasParameterArea &&
6146 "Parameter area must exist if we have a varargs call.");
6147 // We could elide this store in the case where the object fits
6148 // entirely in R registers. Maybe later.
6149 SDValue Store =
6150 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6151 MemOpChains.push_back(Store);
6152 if (VR_idx != NumVRs) {
6153 SDValue Load =
6154 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6155 MemOpChains.push_back(Load.getValue(1));
6156 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6157 }
6158 ArgOffset += 16;
6159 for (unsigned i=0; i<16; i+=PtrByteSize) {
6160 if (GPR_idx == NumGPRs)
6161 break;
6162 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6163 DAG.getConstant(i, dl, PtrVT));
6164 SDValue Load =
6165 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6166 MemOpChains.push_back(Load.getValue(1));
6167 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6168 }
6169 break;
6170 }
6171
6172 // Non-varargs Altivec params go into VRs or on the stack.
6173 if (VR_idx != NumVRs) {
6174 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6175 } else {
6176 if (CallConv == CallingConv::Fast)
6177 ComputePtrOff();
6178
6179 assert(HasParameterArea &&
6180 "Parameter area must exist to pass an argument in memory.");
6181 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6182 true, isTailCall, true, MemOpChains,
6183 TailCallArguments, dl);
6184 if (CallConv == CallingConv::Fast)
6185 ArgOffset += 16;
6186 }
6187
6188 if (CallConv != CallingConv::Fast)
6189 ArgOffset += 16;
6190 break;
6191 } // not QPX
6192
6193 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
6194 "Invalid QPX parameter type");
6195
6197 case MVT::v4f64:
6198 case MVT::v4i1: {
6199 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6200 if (isVarArg) {
6201 assert(HasParameterArea &&
6202 "Parameter area must exist if we have a varargs call.");
6203 // We could elide this store in the case where the object fits
6204 // entirely in R registers. Maybe later.
6205 SDValue Store =
6206 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6207 MemOpChains.push_back(Store);
6208 if (QFPR_idx != NumQFPRs) {
6209 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6210 PtrOff, MachinePointerInfo());
6211 MemOpChains.push_back(Load.getValue(1));
6212 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6213 }
6214 ArgOffset += (IsF32 ? 16 : 32);
6215 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6216 if (GPR_idx == NumGPRs)
6217 break;
6218 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6219 DAG.getConstant(i, dl, PtrVT));
6220 SDValue Load =
6221 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6222 MemOpChains.push_back(Load.getValue(1));
6223 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6224 }
6225 break;
6226 }
6227
6228 // Non-varargs QPX params go into registers or on the stack.
6229 if (QFPR_idx != NumQFPRs) {
6230 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6231 } else {
6232 if (CallConv == CallingConv::Fast)
6233 ComputePtrOff();
6234
6235 assert(HasParameterArea &&
6236 "Parameter area must exist to pass an argument in memory.");
6237 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6238 true, isTailCall, true, MemOpChains,
6239 TailCallArguments, dl);
6240 if (CallConv == CallingConv::Fast)
6241 ArgOffset += (IsF32 ? 16 : 32);
6242 }
6243
6244 if (CallConv != CallingConv::Fast)
6245 ArgOffset += (IsF32 ? 16 : 32);
6246 break;
6247 }
6248 }
6249 }
6250
6251 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6252 "mismatch in size of parameter area");
6253 (void)NumBytesActuallyUsed;
6254
6255 if (!MemOpChains.empty())
6256 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6257
6258 // Check if this is an indirect call (MTCTR/BCTRL).
6259 // See PrepareCall() for more information about calls through function
6260 // pointers in the 64-bit SVR4 ABI.
6261 if (!isTailCall && !isPatchPoint &&
6263 !isa<ExternalSymbolSDNode>(Callee)) {
6264 // Load r2 into a virtual register and store it to the TOC save area.
6265 setUsesTOCBasePtr(DAG);
6266 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6267 // TOC save area offset.
6268 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6269 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6270 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6271 Chain = DAG.getStore(
6272 Val.getValue(1), dl, Val, AddPtr,
6273 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
6274 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6275 // This does not mean the MTCTR instruction must use R12; it's easier
6276 // to model this as an extra parameter, so do that.
6277 if (isELFv2ABI && !isPatchPoint)
6278 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6279 }
6280
6281 // Build a sequence of copy-to-reg nodes chained together with token chain
6282 // and flag operands which copy the outgoing args into the appropriate regs.
6283 SDValue InFlag;
6284 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6285 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6286 RegsToPass[i].second, InFlag);
6287 InFlag = Chain.getValue(1);
6288 }
6289
6290 if (isTailCall && !IsSibCall)
6291 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6292 TailCallArguments);
6293
6294 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
6295 DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
6296 SPDiff, NumBytes, Ins, InVals, CS);
6297}
6298
6299SDValue PPCTargetLowering::LowerCall_Darwin(
6300 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6301 bool isTailCall, bool isPatchPoint,
6303 const SmallVectorImpl<SDValue> &OutVals,
6304 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6306 ImmutableCallSite CS) const {
6307 unsigned NumOps = Outs.size();
6308
6309 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6310 bool isPPC64 = PtrVT == MVT::i64;
6311 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6312
6314
6315 // Mark this function as potentially containing a function that contains a
6316 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6317 // and restoring the callers stack pointer in this functions epilog. This is
6318 // done because by tail calling the called function might overwrite the value
6319 // in this function's (MF) stack pointer stack slot 0(SP).
6320 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6321 CallConv == CallingConv::Fast)
6322 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6323
6324 // Count how many bytes are to be pushed on the stack, including the linkage
6325 // area, and parameter passing area. We start with 24/48 bytes, which is
6326 // prereserved space for [SP][CR][LR][3 x unused].
6327 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6328 unsigned NumBytes = LinkageSize;
6329
6330 // Add up all the space actually used.
6331 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6332 // they all go in registers, but we must reserve stack space for them for
6333 // possible use by the caller. In varargs or 64-bit calls, parameters are
6334 // assigned stack space in order, with padding so Altivec parameters are
6335 // 16-byte aligned.
6336 unsigned nAltivecParamsAtEnd = 0;
6337 for (unsigned i = 0; i != NumOps; ++i) {
6338 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6339 EVT ArgVT = Outs[i].VT;
6340 // Varargs Altivec parameters are padded to a 16 byte boundary.
6341 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6342 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6343 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6344 if (!isVarArg && !isPPC64) {
6345 // Non-varargs Altivec parameters go after all the non-Altivec
6346 // parameters; handle those later so we know how much padding we need.
6347 nAltivecParamsAtEnd++;
6348 continue;
6349 }
6350 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6351 NumBytes = ((NumBytes+15)/16)*16;
6352 }
6353 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6354 }
6355
6356 // Allow for Altivec parameters at the end, if needed.
6357 if (nAltivecParamsAtEnd) {
6358 NumBytes = ((NumBytes+15)/16)*16;
6359 NumBytes += 16*nAltivecParamsAtEnd;
6360 }
6361
6362 // The prolog code of the callee may store up to 8 GPR argument registers to
6363 // the stack, allowing va_start to index over them in memory if its varargs.
6364 // Because we cannot tell if this is needed on the caller side, we have to
6365 // conservatively assume that it is needed. As such, make sure we have at
6366 // least enough stack space for the caller to store the 8 GPRs.
6367 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6368
6369 // Tail call needs the stack to be aligned.
6370 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6371 CallConv == CallingConv::Fast)
6372 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6373
6374 // Calculate by how many bytes the stack has to be adjusted in case of tail
6375 // call optimization.
6376 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
6377
6378 // To protect arguments on the stack from being clobbered in a tail call,
6379 // force all the loads to happen before doing any other lowering.
6380 if (isTailCall)
6381 Chain = DAG.getStackArgumentTokenFactor(Chain);
6382
6383 // Adjust the stack pointer for the new arguments...
6384 // These operations are automatically eliminated by the prolog/epilog pass
6385 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6386 SDValue CallSeqStart = Chain;
6387
6388 // Load the return address and frame pointer so it can be move somewhere else
6389 // later.
6390 SDValue LROp, FPOp;
6391 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6392
6393 // Set up a copy of the stack pointer for use loading and storing any
6394 // arguments that may not fit in the registers available for argument
6395 // passing.
6397 if (isPPC64)
6398 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6399 else
6400 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6401
6402 // Figure out which arguments are going to go in registers, and which in
6403 // memory. Also, if this is a vararg function, floating point operations
6404 // must be stored to our stack, and loaded into integer regs as well, if
6405 // any integer regs are available for argument passing.
6406 unsigned ArgOffset = LinkageSize;
6407 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6408
6409 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6410 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6411 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6412 };
6413 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6414 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6415 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6416 };
6417 static const MCPhysReg VR[] = {
6418 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6419 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6420 };
6421 const unsigned NumGPRs = array_lengthof(GPR_32);
6422 const unsigned NumFPRs = 13;
6423 const unsigned NumVRs = array_lengthof(VR);
6424
6425 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6426
6428 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6429
6430 SmallVector<SDValue, 8> MemOpChains;
6431 for (unsigned i = 0; i != NumOps; ++i) {
6432 SDValue Arg = OutVals[i];
6433 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6434
6435 // PtrOff will be used to store the current argument to the stack if a
6436 // register cannot be found for it.
6437 SDValue PtrOff;
6438
6439 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6440
6441 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6442
6443 // On PPC64, promote integers to 64-bit values.
6444 if (isPPC64 && Arg.getValueType() == MVT::i32) {
6445 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6446 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6447 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6448 }
6449
6450 // FIXME memcpy is used way more than necessary. Correctness first.
6451 // Note: "by value" is code for passing a structure by value, not
6452 // basic types.
6453 if (Flags.isByVal()) {
6454 unsigned Size = Flags.getByValSize();
6455 // Very small objects are passed right-justified. Everything else is
6456 // passed left-justified.
6457 if (Size==1 || Size==2) {
6458 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6459 if (GPR_idx != NumGPRs) {
6460 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6461 MachinePointerInfo(), VT);
6462 MemOpChains.push_back(Load.getValue(1));
6463 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6464
6465 ArgOffset += PtrByteSize;
6466 } else {
6467 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6468 PtrOff.getValueType());
6469 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6470 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6471 CallSeqStart,
6472 Flags, DAG, dl);
6473 ArgOffset += PtrByteSize;
6474 }
6475 continue;
6476 }
6477 // Copy entire object into memory. There are cases where gcc-generated
6478 // code assumes it is there, even if it could be put entirely into
6479 // registers. (This is not what the doc says.)
6480 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6481 CallSeqStart,
6482 Flags, DAG, dl);
6483
6484 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6485 // copy the pieces of the object that fit into registers from the
6486 // parameter save area.
6487 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6488 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6489 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6490 if (GPR_idx != NumGPRs) {
6491 SDValue Load =
6492 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6493 MemOpChains.push_back(Load.getValue(1));
6494 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6495 ArgOffset += PtrByteSize;
6496 } else {
6497 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6498 break;
6499 }
6500 }
6501 continue;
6502 }
6503
6504 switch (Arg.getSimpleValueType().SimpleTy) {
6505 default: llvm_unreachable("Unexpected ValueType for argument!");
6506 case MVT::i1:
6507 case MVT::i32:
6508 case MVT::i64:
6509 if (GPR_idx != NumGPRs) {
6510 if (Arg.getValueType() == MVT::i1)
6511 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6512
6513 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6514 } else {
6515 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6516 isPPC64, isTailCall, false, MemOpChains,
6517 TailCallArguments, dl);
6518 }
6519 ArgOffset += PtrByteSize;
6520 break;
6521 case MVT::f32:
6522 case MVT::f64:
6523 if (FPR_idx != NumFPRs) {
6524 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6525
6526 if (isVarArg) {
6527 SDValue Store =
6528 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6529 MemOpChains.push_back(Store);
6530
6531 // Float varargs are always shadowed in available integer registers
6532 if (GPR_idx != NumGPRs) {
6533 SDValue Load =
6534 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6535 MemOpChains.push_back(Load.getValue(1));
6536 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6537 }
6538 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6539 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6540 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6541 SDValue Load =
6542 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6543 MemOpChains.push_back(Load.getValue(1));
6544 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6545 }
6546 } else {
6547 // If we have any FPRs remaining, we may also have GPRs remaining.
6548 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6549 // GPRs.
6550 if (GPR_idx != NumGPRs)
6551 ++GPR_idx;
6552 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6553 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6554 ++GPR_idx;
6555 }
6556 } else
6557 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6558 isPPC64, isTailCall, false, MemOpChains,
6559 TailCallArguments, dl);
6560 if (isPPC64)
6561 ArgOffset += 8;
6562 else
6563 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6564 break;
6565 case MVT::v4f32:
6566 case MVT::v4i32:
6567 case MVT::v8i16:
6568 case MVT::v16i8:
6569 if (isVarArg) {
6570 // These go aligned on the stack, or in the corresponding R registers
6571 // when within range. The Darwin PPC ABI doc claims they also go in
6572 // V registers; in fact gcc does this only for arguments that are
6573 // prototyped, not for those that match the ... We do it for all
6574 // arguments, seems to work.
6575 while (ArgOffset % 16 !=0) {
6576 ArgOffset += PtrByteSize;
6577 if (GPR_idx != NumGPRs)
6578 GPR_idx++;
6579 }
6580 // We could elide this store in the case where the object fits
6581 // entirely in R registers. Maybe later.
6582 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6583 DAG.getConstant(ArgOffset, dl, PtrVT));
6584 SDValue Store =
6585 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6586 MemOpChains.push_back(Store);
6587 if (VR_idx != NumVRs) {
6588 SDValue Load =
6589 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6590 MemOpChains.push_back(Load.getValue(1));
6591 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6592 }
6593 ArgOffset += 16;
6594 for (unsigned i=0; i<16; i+=PtrByteSize) {
6595 if (GPR_idx == NumGPRs)
6596 break;
6597 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6598 DAG.getConstant(i, dl, PtrVT));
6599 SDValue Load =
6600 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6601 MemOpChains.push_back(Load.getValue(1));
6602 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6603 }
6604 break;
6605 }
6606
6607 // Non-varargs Altivec params generally go in registers, but have
6608 // stack space allocated at the end.
6609 if (VR_idx != NumVRs) {
6610 // Doesn't have GPR space allocated.
6611 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6612 } else if (nAltivecParamsAtEnd==0) {
6613 // We are emitting Altivec params in order.
6614 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6615 isPPC64, isTailCall, true, MemOpChains,
6616 TailCallArguments, dl);
6617 ArgOffset += 16;
6618 }
6619 break;
6620 }
6621 }
6622 // If all Altivec parameters fit in registers, as they usually do,
6623 // they get stack space following the non-Altivec parameters. We
6624 // don't track this here because nobody below needs it.
6625 // If there are more Altivec parameters than fit in registers emit
6626 // the stores here.
6627 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
6628 unsigned j = 0;
6629 // Offset is aligned; skip 1st 12 params which go in V registers.
6630 ArgOffset = ((ArgOffset+15)/16)*16;
6631 ArgOffset += 12*16;
6632 for (unsigned i = 0; i != NumOps; ++i) {
6633 SDValue Arg = OutVals[i];
6634 EVT ArgType = Outs[i].VT;
6635 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6636 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6637 if (++j > NumVRs) {
6638 SDValue PtrOff;
6639 // We are emitting Altivec params in order.
6640 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6641 isPPC64, isTailCall, true, MemOpChains,
6642 TailCallArguments, dl);
6643 ArgOffset += 16;
6644 }
6645 }
6646 }
6647 }
6648
6649 if (!MemOpChains.empty())
6650 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6651
6652 // On Darwin, R12 must contain the address of an indirect callee. This does
6653 // not mean the MTCTR instruction must use R12; it's easier to model this as
6654 // an extra parameter, so do that.
6655 if (!isTailCall &&
6657 !isa<ExternalSymbolSDNode>(Callee) &&
6659 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6660 PPC::R12), Callee));
6661
6662 // Build a sequence of copy-to-reg nodes chained together with token chain
6663 // and flag operands which copy the outgoing args into the appropriate regs.
6664 SDValue InFlag;
6665 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6666 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6667 RegsToPass[i].second, InFlag);
6668 InFlag = Chain.getValue(1);
6669 }
6670
6671 if (isTailCall)
6672 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6673 TailCallArguments);
6674
6675 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6676 /* unused except on PPC64 ELFv1 */ false, DAG,
6677 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6678 NumBytes, Ins, InVals, CS);
6679}
6680
6681
6682SDValue PPCTargetLowering::LowerCall_AIX(
6683 SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
6684 bool isTailCall, bool isPatchPoint,
6686 const SmallVectorImpl<SDValue> &OutVals,
6687 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6689 ImmutableCallSite CS) const {
6690
6691 assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
6692 "Unimplemented calling convention!");
6693 if (isVarArg || isPatchPoint)
6694 report_fatal_error("This call type is unimplemented on AIX.");
6695
6696 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6697 bool isPPC64 = PtrVT == MVT::i64;
6698 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6699 unsigned NumOps = Outs.size();
6700
6701
6702 // Count how many bytes are to be pushed on the stack, including the linkage
6703 // area, parameter list area.
6704 // On XCOFF, we start with 24/48, which is reserved space for
6705 // [SP][CR][LR][2 x reserved][TOC].
6706 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6707
6708 // The prolog code of the callee may store up to 8 GPR argument registers to
6709 // the stack, allowing va_start to index over them in memory if the callee
6710 // is variadic.
6711 // Because we cannot tell if this is needed on the caller side, we have to
6712 // conservatively assume that it is needed. As such, make sure we have at
6713 // least enough stack space for the caller to store the 8 GPRs.
6714 unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
6715
6716 // Adjust the stack pointer for the new arguments...
6717 // These operations are automatically eliminated by the prolog/epilog
6718 // inserter pass.
6719 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6720 SDValue CallSeqStart = Chain;
6721
6722 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6723 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6724 PPC::R7, PPC::R8, PPC::R9, PPC::R10
6725 };
6726 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6727 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6728 PPC::X7, PPC::X8, PPC::X9, PPC::X10
6729 };
6730
6731 const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
6732 : array_lengthof(GPR_32);
6733 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6734 unsigned GPR_idx = 0;
6735
6737
6738 if (isTailCall)
6739 report_fatal_error("Handling of tail call is unimplemented!");
6740 int SPDiff = 0;
6741
6742 for (unsigned i = 0; i != NumOps; ++i) {
6743 SDValue Arg = OutVals[i];
6744 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6745
6746 // Promote integers if needed.
6747 if (Arg.getValueType() == MVT::i1 ||
6748 (isPPC64 && Arg.getValueType() == MVT::i32)) {
6749 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6750 Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
6751 }
6752
6753 // Note: "by value" is code for passing a structure by value, not
6754 // basic types.
6755 if (Flags.isByVal())
6756 report_fatal_error("Passing structure by value is unimplemented!");
6757
6758 switch (Arg.getSimpleValueType().SimpleTy) {
6759 default: llvm_unreachable("Unexpected ValueType for argument!");
6760 case MVT::i1:
6761 case MVT::i32:
6762 case MVT::i64:
6763 if (GPR_idx != NumGPRs)
6764 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6765 else
6766 report_fatal_error("Handling of placing parameters on the stack is "
6767 "unimplemented!");
6768 break;
6769 case MVT::f32:
6770 case MVT::f64:
6771 case MVT::v4f32:
6772 case MVT::v4i32:
6773 case MVT::v8i16:
6774 case MVT::v16i8:
6775 case MVT::v2f64:
6776 case MVT::v2i64:
6777 case MVT::v1i128:
6778 case MVT::f128:
6779 case MVT::v4f64:
6780 case MVT::v4i1:
6781 report_fatal_error("Handling of this parameter type is unimplemented!");
6782 }
6783 }
6784
6786 !isa<ExternalSymbolSDNode>(Callee))
6787 report_fatal_error("Handling of indirect call is unimplemented!");
6788
6789 // Build a sequence of copy-to-reg nodes chained together with token chain
6790 // and flag operands which copy the outgoing args into the appropriate regs.
6791 SDValue InFlag;
6792 for (auto Reg : RegsToPass) {
6793 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
6794 InFlag = Chain.getValue(1);
6795 }
6796
6797 return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6798 /* unused except on PPC64 ELFv1 */ false, DAG,
6799 RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6800 NumBytes, Ins, InVals, CS);
6801}
6802
6803bool
6805 MachineFunction &MF, bool isVarArg,
6807 LLVMContext &Context) const {
6809 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6810 return CCInfo.CheckReturn(
6811 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6813 : RetCC_PPC);
6814}
6815
6816SDValue
6817PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6818 bool isVarArg,
6820 const SmallVectorImpl<SDValue> &OutVals,
6821 const SDLoc &dl, SelectionDAG &DAG) const {
6823 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6824 *DAG.getContext());
6825 CCInfo.AnalyzeReturn(Outs,
6826 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
6828 : RetCC_PPC);
6829
6830 SDValue Flag;
6831 SmallVector<SDValue, 4> RetOps(1, Chain);
6832
6833 // Copy the result values into the output registers.
6834 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
6835 CCValAssign &VA = RVLocs[i];
6836 assert(VA.isRegLoc() && "Can only return in registers!");
6837
6838 SDValue Arg = OutVals[RealResIdx];
6839
6840 switch (VA.getLocInfo()) {
6841 default: llvm_unreachable("Unknown loc info!");
6842 case CCValAssign::Full: break;
6843 case CCValAssign::AExt:
6844 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6845 break;
6846 case CCValAssign::ZExt:
6847 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6848 break;
6849 case CCValAssign::SExt:
6850 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6851 break;
6852 }
6853 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
6854 bool isLittleEndian = Subtarget.isLittleEndian();
6855 // Legalize ret f64 -> ret 2 x i32.
6856 SDValue SVal =
6858 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
6859 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6860 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6861 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6862 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
6863 Flag = Chain.getValue(1);
6864 VA = RVLocs[++i]; // skip ahead to next loc
6865 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
6866 } else
6867 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6868 Flag = Chain.getValue(1);
6869 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6870 }
6871
6872 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6873 const MCPhysReg *I =
6874 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6875 if (I) {
6876 for (; *I; ++I) {
6877
6878 if (PPC::G8RCRegClass.contains(*I))
6879 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6880 else if (PPC::F8RCRegClass.contains(*I))
6881 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6882 else if (PPC::CRRCRegClass.contains(*I))
6883 RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6884 else if (PPC::VRRCRegClass.contains(*I))
6885 RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6886 else
6887 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6888 }
6889 }
6890
6891 RetOps[0] = Chain; // Update chain.
6892
6893 // Add the flag if we have it.
6894 if (Flag.getNode())
6895 RetOps.push_back(Flag);
6896
6897 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6898}
6899
6900SDValue
6901PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6902 SelectionDAG &DAG) const {
6903 SDLoc dl(Op);
6904
6905 // Get the correct type for integers.
6906 EVT IntVT = Op.getValueType();
6907
6908 // Get the inputs.
6909 SDValue Chain = Op.getOperand(0);
6910 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6911 // Build a DYNAREAOFFSET node.
6912 SDValue Ops[2] = {Chain, FPSIdx};
6913 SDVTList VTs = DAG.getVTList(IntVT);
6914 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6915}
6916
6917SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6918 SelectionDAG &DAG) const {
6919 // When we pop the dynamic allocation we need to restore the SP link.
6920 SDLoc dl(Op);
6921
6922 // Get the correct type for pointers.
6923 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6924
6925 // Construct the stack pointer operand.
6926 bool isPPC64 = Subtarget.isPPC64();
6927 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6928 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6929
6930 // Get the operands for the STACKRESTORE.
6931 SDValue Chain = Op.getOperand(0);
6932 SDValue SaveSP = Op.getOperand(1);
6933
6934 // Load the old link SP.
6935 SDValue LoadLinkSP =
6936 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
6937
6938 // Restore the stack pointer.
6939 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6940
6941 // Store the old link SP.
6942 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
6943}
6944
6945SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6947 bool isPPC64 = Subtarget.isPPC64();
6948 EVT PtrVT = getPointerTy(MF.getDataLayout());
6949
6950 // Get current frame pointer save index. The users of this index will be
6951 // primarily DYNALLOC instructions.
6953 int RASI = FI->getReturnAddrSaveIndex();
6954
6955 // If the frame pointer save index hasn't been defined yet.
6956 if (!RASI) {
6957 // Find out what the fix offset of the frame pointer save area.
6958 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6959 // Allocate the frame index for frame pointer save area.
6960 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6961 // Save the result.
6962 FI->setReturnAddrSaveIndex(RASI);
6963 }
6964 return DAG.getFrameIndex(RASI, PtrVT);
6965}
6966
6967SDValue
6968PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6970 bool isPPC64 = Subtarget.isPPC64();
6971 EVT PtrVT = getPointerTy(MF.getDataLayout());
6972
6973 // Get current frame pointer save index. The users of this index will be
6974 // primarily DYNALLOC instructions.
6976 int FPSI = FI->getFramePointerSaveIndex();
6977
6978 // If the frame pointer save index hasn't been defined yet.
6979 if (!FPSI) {
6980 // Find out what the fix offset of the frame pointer save area.
6981 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6982 // Allocate the frame index for frame pointer save area.
6983 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6984 // Save the result.
6985 FI->setFramePointerSaveIndex(FPSI);
6986 }
6987 return DAG.getFrameIndex(FPSI, PtrVT);
6988}
6989
6990SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6991 SelectionDAG &DAG) const {
6992 // Get the inputs.
6993 SDValue Chain = Op.getOperand(0);
6994 SDValue Size = Op.getOperand(1);
6995 SDLoc dl(Op);
6996
6997 // Get the correct type for pointers.
6998 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6999 // Negate the size.
7000 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7001 DAG.getConstant(0, dl, PtrVT), Size);
7002 // Construct a node for the frame pointer save index.
7003 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7004 // Build a DYNALLOC node.
7005 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7006 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7007 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7008}
7009
7010SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7011 SelectionDAG &DAG) const {
7013
7014 bool isPPC64 = Subtarget.isPPC64();
7015 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7016
7017 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7018 return DAG.getFrameIndex(FI, PtrVT);
7019}
7020
7021SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7022 SelectionDAG &DAG) const {
7023 SDLoc DL(Op);
7024 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7026 Op.getOperand(0), Op.getOperand(1));
7027}
7028
7029SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7030 SelectionDAG &DAG) const {
7031 SDLoc DL(Op);
7033 Op.getOperand(0), Op.getOperand(1));
7034}
7035
7036SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7037 if (Op.getValueType().isVector())
7038 return LowerVectorLoad(Op, DAG);
7039
7040 assert(Op.getValueType() == MVT::i1 &&
7041 "Custom lowering only for i1 loads");
7042
7043 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7044
7045 SDLoc dl(Op);
7046 LoadSDNode *LD = cast<LoadSDNode>(Op);
7047
7048 SDValue Chain = LD->getChain();
7049 SDValue BasePtr = LD->getBasePtr();
7050 MachineMemOperand *MMO = LD->getMemOperand();
7051
7052 SDValue NewLD =
7053 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7054 BasePtr, MVT::i8, MMO);
7055 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7056
7057 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7058 return DAG.getMergeValues(Ops, dl);
7059}
7060
7061SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7062 if (Op.getOperand(1).getValueType().isVector())
7063 return LowerVectorStore(Op, DAG);
7064
7065 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7066 "Custom lowering only for i1 stores");
7067
7068 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7069
7070 SDLoc dl(Op);
7071 StoreSDNode *ST = cast<StoreSDNode>(Op);
7072
7073 SDValue Chain = ST->getChain();
7074 SDValue BasePtr = ST->getBasePtr();
7075 SDValue Value = ST->getValue();
7076 MachineMemOperand *MMO = ST->getMemOperand();
7077
7079 Value);
7080 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7081}
7082
7083// FIXME: Remove this once the ANDI glue bug is fixed:
7084SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7085 assert(Op.getValueType() == MVT::i1 &&
7086 "Custom lowering only for i1 results");
7087
7088 SDLoc DL(Op);
7089 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
7090 Op.getOperand(0));
7091}
7092
7093SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7094 SelectionDAG &DAG) const {
7095
7096 // Implements a vector truncate that fits in a vector register as a shuffle.
7097 // We want to legalize vector truncates down to where the source fits in
7098 // a vector register (and target is therefore smaller than vector register
7099 // size). At that point legalization will try to custom lower the sub-legal
7100 // result and get here - where we can contain the truncate as a single target
7101 // operation.
7102
7103 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7104 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7105 //
7106 // We will implement it for big-endian ordering as this (where x denotes
7107 // undefined):
7108 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7109 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7110 //
7111 // The same operation in little-endian ordering will be:
7112 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7113 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7114
7115 assert(Op.getValueType().isVector() && "Vector type expected.");
7116
7117 SDLoc DL(Op);
7118 SDValue N1 = Op.getOperand(0);
7119 unsigned SrcSize = N1.getValueType().getSizeInBits();
7120 assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
7121 SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7122
7123 EVT TrgVT = Op.getValueType();
7124 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7125 EVT EltVT = TrgVT.getVectorElementType();
7126 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7127 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7128
7129 // First list the elements we want to keep.
7130 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7131 SmallVector<int, 16> ShuffV;
7132 if (Subtarget.isLittleEndian())
7133 for (unsigned i = 0; i < TrgNumElts; ++i)
7134 ShuffV.push_back(i * SizeMult);
7135 else
7136 for (unsigned i = 1; i <= TrgNumElts; ++i)
7137 ShuffV.push_back(i * SizeMult - 1);
7138
7139 // Populate the remaining elements with undefs.
7140 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7141 // ShuffV.push_back(i + WideNumElts);
7142 ShuffV.push_back(WideNumElts + 1);
7143
7144 SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7145 return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7146}
7147
7148/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7149/// possible.
7150SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7151 // Not FP? Not a fsel.
7152 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7153 !Op.getOperand(2).getValueType().isFloatingPoint())
7154 return Op;
7155
7156 // We might be able to do better than this under some circumstances, but in
7157 // general, fsel-based lowering of select is a finite-math-only optimization.
7158 // For more information, see section F.3 of the 2.06 ISA specification.
7159 if (!DAG.getTarget().Options.NoInfsFPMath ||
7161 return Op;
7162 // TODO: Propagate flags from the select rather than global settings.
7163 SDNodeFlags Flags;
7164 Flags.setNoInfs(true);
7165 Flags.setNoNaNs(true);
7166
7167 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7168
7169 EVT ResVT = Op.getValueType();
7170 EVT CmpVT = Op.getOperand(0).getValueType();
7171 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7172 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7173 SDLoc dl(Op);
7174
7175 // If the RHS of the comparison is a 0.0, we don't need to do the
7176 // subtraction at all.
7177 SDValue Sel1;
7178 if (isFloatingPointZero(RHS))
7179 switch (CC) {
7180 default: break; // SETUO etc aren't handled by fsel.
7181 case ISD::SETNE:
7182 std::swap(TV, FV);
7184 case ISD::SETEQ:
7185 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7186 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7187 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7188 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7189 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7190 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7191 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7192 case ISD::SETULT:
7193 case ISD::SETLT:
7194 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7196 case ISD::SETOGE:
7197 case ISD::SETGE:
7198 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7199 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7200 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7201 case ISD::SETUGT:
7202 case ISD::SETGT:
7203 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7205 case ISD::SETOLE:
7206 case ISD::SETLE:
7207 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7208 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7209 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7210 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7211 }
7212
7213 SDValue Cmp;
7214 switch (CC) {
7215 default: break; // SETUO etc aren't handled by fsel.
7216 case ISD::SETNE:
7217 std::swap(TV, FV);
7219 case ISD::SETEQ:
7220 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7221 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7222 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7223 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7224 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7225 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7226 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7227 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7228 case ISD::SETULT:
7229 case ISD::SETLT:
7230 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7231 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7232 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7233 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7234 case ISD::SETOGE:
7235 case ISD::SETGE:
7236 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7237 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7238 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7239 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7240 case ISD::SETUGT:
7241 case ISD::SETGT:
7242 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7243 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7244 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7245 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7246 case ISD::SETOLE:
7247 case ISD::SETLE:
7248 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7249 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7250 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7251 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7252 }
7253 return Op;
7254}
7255
7256void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7257 SelectionDAG &DAG,
7258 const SDLoc &dl) const {
7259 assert(Op.getOperand(0).getValueType().isFloatingPoint());
7260 SDValue Src = Op.getOperand(0);
7261 if (Src.getValueType() == MVT::f32)
7262 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7263
7264 SDValue Tmp;
7265 switch (Op.getSimpleValueType().SimpleTy) {
7266 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7267 case MVT::i32:
7268 Tmp = DAG.getNode(
7269 Op.getOpcode() == ISD::FP_TO_SINT
7271 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7272 dl, MVT::f64, Src);
7273 break;
7274 case MVT::i64:
7275 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7276 "i64 FP_TO_UINT is supported only with FPCVT");
7277 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7279 dl, MVT::f64, Src);
7280 break;
7281 }
7282
7283 // Convert the FP value to an int value through memory.
7284 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7285 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
7286 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7287 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7288 MachinePointerInfo MPI =
7290
7291 // Emit a store to the stack slot.
7292 SDValue Chain;
7293 if (i32Stack) {
7295 MachineMemOperand *MMO =
7297 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
7298 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7299 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7300 } else
7301 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
7302
7303 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7304 // add in a bias on big endian.
7305 if (Op.getValueType() == MVT::i32 && !i32Stack) {
7306 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7307 DAG.getConstant(4, dl, FIPtr.getValueType()));
7308 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7309 }
7310
7311 RLI.Chain = Chain;
7312 RLI.Ptr = FIPtr;
7313 RLI.MPI = MPI;
7314}
7315
7316/// Custom lowers floating point to integer conversions to use
7317/// the direct move instructions available in ISA 2.07 to avoid the
7318/// need for load/store combinations.
7319SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7320 SelectionDAG &DAG,
7321 const SDLoc &dl) const {
7322 assert(Op.getOperand(0).getValueType().isFloatingPoint());
7323 SDValue Src = Op.getOperand(0);
7324
7325 if (Src.getValueType() == MVT::f32)
7326 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7327
7328 SDValue Tmp;
7329 switch (Op.getSimpleValueType().SimpleTy) {
7330 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7331 case MVT::i32:
7332 Tmp = DAG.getNode(
7333 Op.getOpcode() == ISD::FP_TO_SINT
7335 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
7336 dl, MVT::f64, Src);
7337 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
7338 break;
7339 case MVT::i64:
7340 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
7341 "i64 FP_TO_UINT is supported only with FPCVT");
7342 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
7344 dl, MVT::f64, Src);
7345 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
7346 break;
7347 }
7348 return Tmp;
7349}
7350
7351SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7352 const SDLoc &dl) const {
7353
7354 // FP to INT conversions are legal for f128.
7355 if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
7356 return Op;
7357
7358 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7359 // PPC (the libcall is not available).
7360 if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
7361 if (Op.getValueType() == MVT::i32) {
7362 if (Op.getOpcode() == ISD::FP_TO_SINT) {
7364 MVT::f64, Op.getOperand(0),
7365 DAG.getIntPtrConstant(0, dl));
7367 MVT::f64, Op.getOperand(0),
7368 DAG.getIntPtrConstant(1, dl));
7369
7370 // Add the two halves of the long double in round-to-zero mode.
7371 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7372
7373 // Now use a smaller FP_TO_SINT.
7374 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7375 }
7376 if (Op.getOpcode() == ISD::FP_TO_UINT) {
7377 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7378 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7379 SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
7380 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7381 // FIXME: generated code sucks.
7382 // TODO: Are there fast-math-flags to propagate to this FSUB?
7383 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
7384 Op.getOperand(0), Tmp);
7385 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7386 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
7387 DAG.getConstant(0x80000000, dl, MVT::i32));
7388 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
7389 Op.getOperand(0));
7390 return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
7391 ISD::SETGE);
7392 }
7393 }
7394
7395 return SDValue();
7396 }
7397
7398 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7399 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7400
7401 ReuseLoadInfo RLI;
7402 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7403
7404 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7405 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7406}
7407
7408// We're trying to insert a regular store, S, and then a load, L. If the
7409// incoming value, O, is a load, we might just be able to have our load use the
7410// address used by O. However, we don't know if anything else will store to
7411// that address before we can load from it. To prevent this situation, we need
7412// to insert our load, L, into the chain as a peer of O. To do this, we give L
7413// the same chain operand as O, we create a token factor from the chain results
7414// of O and L, and we replace all uses of O's chain result with that token
7415// factor (see spliceIntoChain below for this last part).
7416bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7417 ReuseLoadInfo &RLI,
7418 SelectionDAG &DAG,
7419 ISD::LoadExtType ET) const {
7420 SDLoc dl(Op);
7421 if (ET == ISD::NON_EXTLOAD &&
7422 (Op.getOpcode() == ISD::FP_TO_UINT ||
7423 Op.getOpcode() == ISD::FP_TO_SINT) &&
7424 isOperationLegalOrCustom(Op.getOpcode(),
7425 Op.getOperand(0).getValueType())) {
7426
7427 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7428 return true;
7429 }
7430
7431 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7432 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7433 LD->isNonTemporal())
7434 return false;
7435 if (LD->getMemoryVT() != MemVT)
7436 return false;
7437
7438 RLI.Ptr = LD->getBasePtr();
7439 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7440 assert(LD->getAddressingMode() == ISD::PRE_INC &&
7441 "Non-pre-inc AM on PPC?");
7442 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7443 LD->getOffset());
7444 }
7445
7446 RLI.Chain = LD->getChain();
7447 RLI.MPI = LD->getPointerInfo();
7448 RLI.IsDereferenceable = LD->isDereferenceable();
7449 RLI.IsInvariant = LD->isInvariant();
7450 RLI.Alignment = LD->getAlignment();
7451 RLI.AAInfo = LD->getAAInfo();
7452 RLI.Ranges = LD->getRanges();
7453
7454 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7455 return true;
7456}
7457
7458// Given the head of the old chain, ResChain, insert a token factor containing
7459// it and NewResChain, and make users of ResChain now be users of that token
7460// factor.
7461// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7462void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7463 SDValue NewResChain,
7464 SelectionDAG &DAG) const {
7465 if (!ResChain)
7466 return;
7467
7468 SDLoc dl(NewResChain);
7469
7471 NewResChain, DAG.getUNDEF(MVT::Other));
7472 assert(TF.getNode() != NewResChain.getNode() &&
7473 "A new TF really is required here");
7474
7475 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7476 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7477}
7478
7479/// Analyze profitability of direct move
7480/// prefer float load to int load plus direct move
7481/// when there is no integer use of int load
7482bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7483 SDNode *Origin = Op.getOperand(0).getNode();
7484 if (Origin->getOpcode() != ISD::LOAD)
7485 return true;
7486
7487 // If there is no LXSIBZX/LXSIHZX, like Power8,
7488 // prefer direct move if the memory size is 1 or 2 bytes.
7489 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7490 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7491 return true;
7492
7493 for (SDNode::use_iterator UI = Origin->use_begin(),
7494 UE = Origin->use_end();
7495 UI != UE; ++UI) {
7496
7497 // Only look at the users of the loaded value.
7498 if (UI.getUse().get().getResNo() != 0)
7499 continue;
7500
7501 if (UI->getOpcode() != ISD::SINT_TO_FP &&
7502 UI->getOpcode() != ISD::UINT_TO_FP)
7503 return true;
7504 }
7505
7506 return false;
7507}
7508
7509/// Custom lowers integer to floating point conversions to use
7510/// the direct move instructions available in ISA 2.07 to avoid the
7511/// need for load/store combinations.
7512SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7513 SelectionDAG &DAG,
7514 const SDLoc &dl) const {
7515 assert((Op.getValueType() == MVT::f32 ||
7516 Op.getValueType() == MVT::f64) &&
7517 "Invalid floating point type as target of conversion");
7518 assert(Subtarget.hasFPCVT() &&
7519 "Int to FP conversions with direct moves require FPCVT");
7520 SDValue FP;
7521 SDValue Src = Op.getOperand(0);
7522 bool SinglePrec = Op.getValueType() == MVT::f32;
7523 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7524 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
7525 unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
7526 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
7527
7528 if (WordInt) {
7530 dl, MVT::f64, Src);
7531 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7532 }
7533 else {
7534 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
7535 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
7536 }
7537
7538 return FP;
7539}
7540
7541static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7542
7543 EVT VecVT = Vec.getValueType();
7544 assert(VecVT.isVector() && "Expected a vector type.");
7545 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7546
7547 EVT EltVT = VecVT.getVectorElementType();
7548 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7549 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7550
7551 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7552 SmallVector<SDValue, 16> Ops(NumConcat);
7553 Ops[0] = Vec;
7554 SDValue UndefVec = DAG.getUNDEF(VecVT);
7555 for (unsigned i = 1; i < NumConcat; ++i)
7556 Ops[i] = UndefVec;
7557
7558 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7559}
7560
7561SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7562 const SDLoc &dl) const {
7563
7564 unsigned Opc = Op.getOpcode();
7565 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
7566 "Unexpected conversion type");
7567 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7568 "Supports conversions to v2f64/v4f32 only.");
7569
7570 bool SignedConv = Opc == ISD::SINT_TO_FP;
7571 bool FourEltRes = Op.getValueType() == MVT::v4f32;
7572
7573 SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
7574 EVT WideVT = Wide.getValueType();
7575 unsigned WideNumElts = WideVT.getVectorNumElements();
7576 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7577
7578 SmallVector<int, 16> ShuffV;
7579 for (unsigned i = 0; i < WideNumElts; ++i)
7580 ShuffV.push_back(i + WideNumElts);
7581
7582 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7583 int SaveElts = FourEltRes ? 4 : 2;
7584 if (Subtarget.isLittleEndian())
7585 for (int i = 0; i < SaveElts; i++)
7586 ShuffV[i * Stride] = i;
7587 else
7588 for (int i = 1; i <= SaveElts; i++)
7589 ShuffV[i * Stride - 1] = i - 1;
7590
7591 SDValue ShuffleSrc2 =
7592 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7593 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7594 unsigned ExtendOp =
7595 SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
7596
7597 SDValue Extend;
7598 if (!Subtarget.hasP9Altivec() && SignedConv) {
7599 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7600 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7601 DAG.getValueType(Op.getOperand(0).getValueType()));
7602 } else
7603 Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
7604
7605 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7606}
7607
7608SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7609 SelectionDAG &DAG) const {
7610 SDLoc dl(Op);
7611
7612 EVT InVT = Op.getOperand(0).getValueType();
7613 EVT OutVT = Op.getValueType();
7614 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7615 isOperationCustom(Op.getOpcode(), InVT))
7616 return LowerINT_TO_FPVector(Op, DAG, dl);
7617
7618 // Conversions to f128 are legal.
7619 if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
7620 return Op;
7621
7622 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
7623 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
7624 return SDValue();
7625
7626 SDValue Value = Op.getOperand(0);
7627 // The values are now known to be -1 (false) or 1 (true). To convert this
7628 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7629 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7631
7632 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7633
7634 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7635
7636 if (Op.getValueType() != MVT::v4f64)
7637 Value = DAG.getNode(ISD::FP_ROUND, dl,
7638 Op.getValueType(), Value,
7639 DAG.getIntPtrConstant(1, dl));
7640 return Value;
7641 }
7642
7643 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7644 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7645 return SDValue();
7646
7647 if (Op.getOperand(0).getValueType() == MVT::i1)
7648 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
7649 DAG.getConstantFP(1.0, dl, Op.getValueType()),
7650 DAG.getConstantFP(0.0, dl, Op.getValueType()));
7651
7652 // If we have direct moves, we can do all the conversion, skip the store/load
7653 // however, without FPCVT we can't do most conversions.
7654 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
7655 Subtarget.isPPC64() && Subtarget.hasFPCVT())
7656 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
7657
7658 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
7659 "UINT_TO_FP is supported only with FPCVT");
7660
7661 // If we have FCFIDS, then use it when converting to single-precision.
7662 // Otherwise, convert to double-precision and then round.
7663 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7664 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
7666 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
7667 : PPCISD::FCFID);
7668 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
7669 ? MVT::f32
7670 : MVT::f64;
7671
7672 if (Op.getOperand(0).getValueType() == MVT::i64) {
7673 SDValue SINT = Op.getOperand(0);
7674 // When converting to single-precision, we actually need to convert
7675 // to double-precision first and then round to single-precision.
7676 // To avoid double-rounding effects during that operation, we have
7677 // to prepare the input operand. Bits that might be truncated when
7678 // converting to double-precision are replaced by a bit that won't
7679 // be lost at this stage, but is below the single-precision rounding
7680 // position.
7681 //
7682 // However, if -enable-unsafe-fp-math is in effect, accept double
7683 // rounding to avoid the extra overhead.
7684 if (Op.getValueType() == MVT::f32 &&
7685 !Subtarget.hasFPCVT() &&
7687
7688 // Twiddle input to make sure the low 11 bits are zero. (If this
7689 // is the case, we are guaranteed the value will fit into the 53 bit
7690 // mantissa of an IEEE double-precision value without rounding.)
7691 // If any of those low 11 bits were not zero originally, make sure
7692 // bit 12 (value 2048) is set instead, so that the final rounding
7693 // to single-precision gets the correct result.
7694 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7695 SINT, DAG.getConstant(2047, dl, MVT::i64));
7696 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
7697 Round, DAG.getConstant(2047, dl, MVT::i64));
7698 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
7699 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
7700 Round, DAG.getConstant(-2048, dl, MVT::i64));
7701
7702 // However, we cannot use that value unconditionally: if the magnitude
7703 // of the input value is small, the bit-twiddling we did above might
7704 // end up visibly changing the output. Fortunately, in that case, we
7705 // don't need to twiddle bits since the original input will convert
7706 // exactly to double-precision floating-point already. Therefore,
7707 // construct a conditional to use the original value if the top 11
7708 // bits are all sign-bit copies, and use the rounded value computed
7709 // above otherwise.
7710 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
7711 SINT, DAG.getConstant(53, dl, MVT::i32));
7712 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
7713 Cond, DAG.getConstant(1, dl, MVT::i64));
7714 Cond = DAG.getSetCC(dl, MVT::i32,
7715 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
7716
7717 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
7718 }
7719
7720 ReuseLoadInfo RLI;
7721 SDValue Bits;
7722
7724 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
7725 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7726 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7727 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7728 } else if (Subtarget.hasLFIWAX() &&
7729 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
7730 MachineMemOperand *MMO =
7732 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7733 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7736 Ops, MVT::i32, MMO);
7737 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7738 } else if (Subtarget.hasFPCVT() &&
7739 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
7740 MachineMemOperand *MMO =
7742 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7743 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7746 Ops, MVT::i32, MMO);
7747 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
7748 } else if (((Subtarget.hasLFIWAX() &&
7749 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
7750 (Subtarget.hasFPCVT() &&
7751 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
7752 SINT.getOperand(0).getValueType() == MVT::i32) {
7753 MachineFrameInfo &MFI = MF.getFrameInfo();
7754 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7755
7756 int FrameIdx = MFI.CreateStackObject(4, 4, false);
7757 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7758
7759 SDValue Store =
7760 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
7762 DAG.getMachineFunction(), FrameIdx));
7763
7764 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7765 "Expected an i32 store");
7766
7767 RLI.Ptr = FIdx;
7768 RLI.Chain = Store;
7769 RLI.MPI =
7771 RLI.Alignment = 4;
7772
7773 MachineMemOperand *MMO =
7775 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7776 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7779 dl, DAG.getVTList(MVT::f64, MVT::Other),
7780 Ops, MVT::i32, MMO);
7781 } else
7782 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
7783
7784 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
7785
7786 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7787 FP = DAG.getNode(ISD::FP_ROUND, dl,
7788 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
7789 return FP;
7790 }
7791
7792 assert(Op.getOperand(0).getValueType() == MVT::i32 &&
7793 "Unhandled INT_TO_FP type in custom expander!");
7794 // Since we only generate this in 64-bit mode, we can take advantage of
7795 // 64-bit registers. In particular, sign extend the input value into the
7796 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7797 // then lfd it and fcfid it.
7799 MachineFrameInfo &MFI = MF.getFrameInfo();
7800 EVT PtrVT = getPointerTy(MF.getDataLayout());
7801
7802 SDValue Ld;
7803 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
7804 ReuseLoadInfo RLI;
7805 bool ReusingLoad;
7806 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
7807 DAG))) {
7808 int FrameIdx = MFI.CreateStackObject(4, 4, false);
7809 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7810
7811 SDValue Store =
7812 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
7814 DAG.getMachineFunction(), FrameIdx));
7815
7816 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
7817 "Expected an i32 store");
7818
7819 RLI.Ptr = FIdx;
7820 RLI.Chain = Store;
7821 RLI.MPI =
7823 RLI.Alignment = 4;
7824 }
7825
7826 MachineMemOperand *MMO =
7828 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
7829 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
7830 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
7832 dl, DAG.getVTList(MVT::f64, MVT::Other),
7833 Ops, MVT::i32, MMO);
7834 if (ReusingLoad)
7835 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
7836 } else {
7837 assert(Subtarget.isPPC64() &&
7838 "i32->FP without LFIWAX supported only on PPC64");
7839
7840 int FrameIdx = MFI.CreateStackObject(8, 8, false);
7841 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7842
7843 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
7844 Op.getOperand(0));
7845
7846 // STD the extended value into the stack slot.
7847 SDValue Store = DAG.getStore(
7848 DAG.getEntryNode(), dl, Ext64, FIdx,
7850
7851 // Load the value as a double.
7852 Ld = DAG.getLoad(
7853 MVT::f64, dl, Store, FIdx,
7855 }
7856
7857 // FCFID it and return it.
7858 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
7859 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
7860 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
7861 DAG.getIntPtrConstant(0, dl));
7862 return FP;
7863}
7864
7865SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
7866 SelectionDAG &DAG) const {
7867 SDLoc dl(Op);
7868 /*
7869 The rounding mode is in bits 30:31 of FPSR, and has the following
7870 settings:
7871 00 Round to nearest
7872 01 Round to 0
7873 10 Round to +inf
7874 11 Round to -inf
7875
7876 FLT_ROUNDS, on the other hand, expects the following:
7877 -1 Undefined
7878 0 Round to 0
7879 1 Round to nearest
7880 2 Round to +inf
7881 3 Round to -inf
7882
7883 To perform the conversion, we do:
7884 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7885 */
7886
7888 EVT VT = Op.getValueType();
7889 EVT PtrVT = getPointerTy(MF.getDataLayout());
7890
7891 // Save FP Control Word to register
7892 EVT NodeTys[] = {
7893 MVT::f64, // return register
7894 MVT::Glue // unused in this context
7895 };
7896 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
7897
7898 // Save FP register to stack slot
7899 int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
7900 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
7901 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
7903
7904 // Load FP Control Word from low 32 bits of stack slot.
7905 SDValue Four = DAG.getConstant(4, dl, PtrVT);
7906 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
7907 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
7908
7909 // Transform as necessary
7910 SDValue CWD1 =
7911 DAG.getNode(ISD::AND, dl, MVT::i32,
7912 CWD, DAG.getConstant(3, dl, MVT::i32));
7913 SDValue CWD2 =
7914 DAG.getNode(ISD::SRL, dl, MVT::i32,
7915 DAG.getNode(ISD::AND, dl, MVT::i32,
7916 DAG.getNode(ISD::XOR, dl, MVT::i32,
7917 CWD, DAG.getConstant(3, dl, MVT::i32)),
7918 DAG.getConstant(3, dl, MVT::i32)),
7919 DAG.getConstant(1, dl, MVT::i32));
7920
7921 SDValue RetVal =
7922 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
7923
7924 return DAG.getNode((VT.getSizeInBits() < 16 ?
7925 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
7926}
7927
7928SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7929 EVT VT = Op.getValueType();
7930 unsigned BitWidth = VT.getSizeInBits();
7931 SDLoc dl(Op);
7932 assert(Op.getNumOperands() == 3 &&
7933 VT == Op.getOperand(1).getValueType() &&
7934 "Unexpected SHL!");
7935
7936 // Expand into a bunch of logical ops. Note that these ops
7937 // depend on the PPC behavior for oversized shift amounts.
7938 SDValue Lo = Op.getOperand(0);
7939 SDValue Hi = Op.getOperand(1);
7940 SDValue Amt = Op.getOperand(2);
7941 EVT AmtVT = Amt.getValueType();
7942
7943 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7944 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7945 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
7946 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
7947 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
7948 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7949 DAG.getConstant(-BitWidth, dl, AmtVT));
7950 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
7951 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7952 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
7953 SDValue OutOps[] = { OutLo, OutHi };
7954 return DAG.getMergeValues(OutOps, dl);
7955}
7956
7957SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
7958 EVT VT = Op.getValueType();
7959 SDLoc dl(Op);
7960 unsigned BitWidth = VT.getSizeInBits();
7961 assert(Op.getNumOperands() == 3 &&
7962 VT == Op.getOperand(1).getValueType() &&
7963 "Unexpected SRL!");
7964
7965 // Expand into a bunch of logical ops. Note that these ops
7966 // depend on the PPC behavior for oversized shift amounts.
7967 SDValue Lo = Op.getOperand(0);
7968 SDValue Hi = Op.getOperand(1);
7969 SDValue Amt = Op.getOperand(2);
7970 EVT AmtVT = Amt.getValueType();
7971
7972 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
7973 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
7974 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
7975 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7976 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7977 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7978 DAG.getConstant(-BitWidth, dl, AmtVT));
7979 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
7980 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
7981 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
7982 SDValue OutOps[] = { OutLo, OutHi };
7983 return DAG.getMergeValues(OutOps, dl);
7984}
7985
7986SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
7987 SDLoc dl(Op);
7988 EVT VT = Op.getValueType();
7989 unsigned BitWidth = VT.getSizeInBits();
7990 assert(Op.getNumOperands() == 3 &&
7991 VT == Op.getOperand(1).getValueType() &&
7992 "Unexpected SRA!");
7993
7994 // Expand into a bunch of logical ops, followed by a select_cc.
7995 SDValue Lo = Op.getOperand(0);
7996 SDValue Hi = Op.getOperand(1);
7997 SDValue Amt = Op.getOperand(2);
7998 EVT AmtVT = Amt.getValueType();
7999
8000 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8001 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8002 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8003 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8004 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8005 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8006 DAG.getConstant(-BitWidth, dl, AmtVT));
8007 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8008 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8009 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8010 Tmp4, Tmp6, ISD::SETLE);
8011 SDValue OutOps[] = { OutLo, OutHi };
8012 return DAG.getMergeValues(OutOps, dl);
8013}
8014
8015//===----------------------------------------------------------------------===//
8016// Vector related lowering.
8017//
8018
8019/// BuildSplatI - Build a canonical splati of Val with an element size of
8020/// SplatSize. Cast the result to VT.
8021static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
8022 SelectionDAG &DAG, const SDLoc &dl) {
8023 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
8024
8025 static const MVT VTys[] = { // canonical VT to use for each size.
8027 };
8028
8029 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8030
8031 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8032 if (Val == -1)
8033 SplatSize = 1;
8034
8035 EVT CanonicalVT = VTys[SplatSize-1];
8036
8037 // Build a canonical splat for this value.
8038 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8039}
8040
8041/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8042/// specified intrinsic ID.
8043static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8044 const SDLoc &dl, EVT DestVT = MVT::Other) {
8045 if (DestVT == MVT::Other) DestVT = Op.getValueType();
8046 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8047 DAG.getConstant(IID, dl, MVT::i32), Op);
8048}
8049
8050/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8051/// specified intrinsic ID.
8052static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8053 SelectionDAG &DAG, const SDLoc &dl,
8054 EVT DestVT = MVT::Other) {
8055 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8056 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8057 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8058}
8059
8060/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8061/// specified intrinsic ID.
8062static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8063 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8064 EVT DestVT = MVT::Other) {
8065 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8066 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8067 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8068}
8069
8070/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8071/// amount. The result has the specified value type.
8072static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8073 SelectionDAG &DAG, const SDLoc &dl) {
8074 // Force LHS/RHS to be the right type.
8075 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8076 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8077
8078 int Ops[16];
8079 for (unsigned i = 0; i != 16; ++i)
8080 Ops[i] = i + Amt;
8081 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8082 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8083}
8084
8085/// Do we have an efficient pattern in a .td file for this node?
8086///
8087/// \param V - pointer to the BuildVectorSDNode being matched
8088/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8089///
8090/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8091/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8092/// the opposite is true (expansion is beneficial) are:
8093/// - The node builds a vector out of integers that are not 32 or 64-bits
8094/// - The node builds a vector out of constants
8095/// - The node is a "load-and-splat"
8096/// In all other cases, we will choose to keep the BUILD_VECTOR.
8098 bool HasDirectMove,
8099 bool HasP8Vector) {
8100 EVT VecVT = V->getValueType(0);
8101 bool RightType = VecVT == MVT::v2f64 ||
8102 (HasP8Vector && VecVT == MVT::v4f32) ||
8103 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8104 if (!RightType)
8105 return false;
8106
8107 bool IsSplat = true;
8108 bool IsLoad = false;
8109 SDValue Op0 = V->getOperand(0);
8110
8111 // This function is called in a block that confirms the node is not a constant
8112 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8113 // different constants.
8114 if (V->isConstant())
8115 return false;
8116 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8117 if (V->getOperand(i).isUndef())
8118 return false;
8119 // We want to expand nodes that represent load-and-splat even if the
8120 // loaded value is a floating point truncation or conversion to int.
8121 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8122 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8123 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8124 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8125 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8126 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8128 IsLoad = true;
8129 // If the operands are different or the input is not a load and has more
8130 // uses than just this BV node, then it isn't a splat.
8131 if (V->getOperand(i) != Op0 ||
8132 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8133 IsSplat = false;
8134 }
8135 return !(IsSplat && IsLoad);
8136}
8137
8138// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8139SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8140
8141 SDLoc dl(Op);
8142 SDValue Op0 = Op->getOperand(0);
8143
8144 if (!EnableQuadPrecision ||
8145 (Op.getValueType() != MVT::f128 ) ||
8146 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8147 (Op0.getOperand(0).getValueType() != MVT::i64) ||
8148 (Op0.getOperand(1).getValueType() != MVT::i64))
8149 return SDValue();
8150
8151 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8152 Op0.getOperand(1));
8153}
8154
8155// If this is a case we can't handle, return null and let the default
8156// expansion code take care of it. If we CAN select this case, and if it
8157// selects to a single instruction, return Op. Otherwise, if we can codegen
8158// this case more efficiently than a constant pool load, lower it to the
8159// sequence of ops that should be used.
8160SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8161 SelectionDAG &DAG) const {
8162 SDLoc dl(Op);
8163 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8164 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8165
8166 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
8167 // We first build an i32 vector, load it into a QPX register,
8168 // then convert it to a floating-point vector and compare it
8169 // to a zero vector to get the boolean result.
8171 int FrameIdx = MFI.CreateStackObject(16, 16, false);
8172 MachinePointerInfo PtrInfo =
8174 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8175 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8176
8177 assert(BVN->getNumOperands() == 4 &&
8178 "BUILD_VECTOR for v4i1 does not have 4 operands");
8179
8180 bool IsConst = true;
8181 for (unsigned i = 0; i < 4; ++i) {
8182 if (BVN->getOperand(i).isUndef()) continue;
8183 if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
8184 IsConst = false;
8185 break;
8186 }
8187 }
8188
8189 if (IsConst) {
8190 Constant *One =
8192 Constant *NegOne =
8194
8195 Constant *CV[4];
8196 for (unsigned i = 0; i < 4; ++i) {
8197 if (BVN->getOperand(i).isUndef())
8199 else if (isNullConstant(BVN->getOperand(i)))
8200 CV[i] = NegOne;
8201 else
8202 CV[i] = One;
8203 }
8204
8206 SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
8207 16 /* alignment */);
8208
8209 SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
8210 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
8211 return DAG.getMemIntrinsicNode(
8212 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
8214 }
8215
8217 for (unsigned i = 0; i < 4; ++i) {
8218 if (BVN->getOperand(i).isUndef()) continue;
8219
8220 unsigned Offset = 4*i;
8221 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8222 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8223
8224 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
8225 if (StoreSize > 4) {
8226 Stores.push_back(
8227 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
8228 PtrInfo.getWithOffset(Offset), MVT::i32));
8229 } else {
8230 SDValue StoreValue = BVN->getOperand(i);
8231 if (StoreSize < 4)
8232 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
8233
8234 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
8235 PtrInfo.getWithOffset(Offset)));
8236 }
8237 }
8238
8239 SDValue StoreChain;
8240 if (!Stores.empty())
8241 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8242 else
8243 StoreChain = DAG.getEntryNode();
8244
8245 // Now load from v4i32 into the QPX register; this will extend it to
8246 // v4i64 but not yet convert it to a floating point. Nevertheless, this
8247 // is typed as v4f64 because the QPX register integer states are not
8248 // explicitly represented.
8249
8250 SDValue Ops[] = {StoreChain,
8251 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
8252 FIdx};
8253 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
8254
8256 dl, VTs, Ops, MVT::v4i32, PtrInfo);
8257 LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8258 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
8259 LoadedVect);
8260
8261 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
8262
8263 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
8264 }
8265
8266 // All other QPX vectors are handled by generic code.
8267 if (Subtarget.hasQPX())
8268 return SDValue();
8269
8270 // Check if this is a splat of a constant value.
8271 APInt APSplatBits, APSplatUndef;
8272 unsigned SplatBitSize;
8273 bool HasAnyUndefs;
8274 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8275 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
8276 SplatBitSize > 32) {
8277 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8278 // lowered to VSX instructions under certain conditions.
8279 // Without VSX, there is no pattern more efficient than expanding the node.
8280 if (Subtarget.hasVSX() &&
8282 Subtarget.hasP8Vector()))
8283 return Op;
8284 return SDValue();
8285 }
8286
8287 unsigned SplatBits = APSplatBits.getZExtValue();
8288 unsigned SplatUndef = APSplatUndef.getZExtValue();
8289 unsigned SplatSize = SplatBitSize / 8;
8290
8291 // First, handle single instruction cases.
8292
8293 // All zeros?
8294 if (SplatBits == 0) {
8295 // Canonicalize all zero vectors to be v4i32.
8296 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8297 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8298 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8299 }
8300 return Op;
8301 }
8302
8303 // We have XXSPLTIB for constant splats one byte wide
8304 if (Subtarget.hasP9Vector() && SplatSize == 1) {
8305 // This is a splat of 1-byte elements with some elements potentially undef.
8306 // Rather than trying to match undef in the SDAG patterns, ensure that all
8307 // elements are the same constant.
8308 if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
8309 SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
8310 dl, MVT::i32));
8311 SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
8312 if (Op.getValueType() != MVT::v16i8)
8313 return DAG.getBitcast(Op.getValueType(), NewBV);
8314 return NewBV;
8315 }
8316
8317 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8318 // detect that constant splats like v8i16: 0xABAB are really just splats
8319 // of a 1-byte constant. In this case, we need to convert the node to a
8320 // splat of v16i8 and a bitcast.
8321 if (Op.getValueType() != MVT::v16i8)
8322 return DAG.getBitcast(Op.getValueType(),
8323 DAG.getConstant(SplatBits, dl, MVT::v16i8));
8324
8325 return Op;
8326 }
8327
8328 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8329 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8330 (32-SplatBitSize));
8331 if (SextVal >= -16 && SextVal <= 15)
8332 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
8333
8334 // Two instruction sequences.
8335
8336 // If this value is in the range [-32,30] and is even, use:
8337 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8338 // If this value is in the range [17,31] and is odd, use:
8339 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8340 // If this value is in the range [-31,-17] and is odd, use:
8341 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8342 // Note the last two are three-instruction sequences.
8343 if (SextVal >= -32 && SextVal <= 31) {
8344 // To avoid having these optimizations undone by constant folding,
8345 // we convert to a pseudo that will be expanded later into one of
8346 // the above forms.
8347 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8348 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8349 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8350 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8351 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8352 if (VT == Op.getValueType())
8353 return RetVal;
8354 else
8355 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8356 }
8357
8358 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8359 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8360 // for fneg/fabs.
8361 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8362 // Make -1 and vspltisw -1:
8363 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
8364
8365 // Make the VSLW intrinsic, computing 0x8000_0000.
8366 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8367 OnesV, DAG, dl);
8368
8369 // xor by OnesV to invert it.
8370 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8371 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8372 }
8373
8374 // Check to see if this is a wide variety of vsplti*, binop self cases.
8375 static const signed char SplatCsts[] = {
8376 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8377 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8378 };
8379
8380 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8381 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8382 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8383 int i = SplatCsts[idx];
8384
8385 // Figure out what shift amount will be used by altivec if shifted by i in
8386 // this splat size.
8387 unsigned TypeShiftAmt = i & (SplatBitSize-1);
8388
8389 // vsplti + shl self.
8390 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8391 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8392 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8393 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8394 Intrinsic::ppc_altivec_vslw
8395 };
8396 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8397 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8398 }
8399
8400 // vsplti + srl self.
8401 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8402 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8403 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8404 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8405 Intrinsic::ppc_altivec_vsrw
8406 };
8407 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8408 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8409 }
8410
8411 // vsplti + sra self.
8412 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8413 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8414 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8415 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
8416 Intrinsic::ppc_altivec_vsraw
8417 };
8418 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8419 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8420 }
8421
8422 // vsplti + rol self.
8423 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8424 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8425 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
8426 static const unsigned IIDs[] = { // Intrinsic to use for each size.
8427 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8428 Intrinsic::ppc_altivec_vrlw
8429 };
8430 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8431 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8432 }
8433
8434 // t = vsplti c, result = vsldoi t, t, 1
8435 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8436 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8437 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8438 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8439 }
8440 // t = vsplti c, result = vsldoi t, t, 2
8441 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8442 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8443 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8444 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8445 }
8446 // t = vsplti c, result = vsldoi t, t, 3
8447 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8448 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
8449 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8450 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8451 }
8452 }
8453
8454 return SDValue();
8455}
8456
8457/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8458/// the specified operations to build the shuffle.
8459static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8460 SDValue RHS, SelectionDAG &DAG,
8461 const SDLoc &dl) {
8462 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8463 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8464 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8465
8466 enum {
8467 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8468 OP_VMRGHW,
8469 OP_VMRGLW,
8470 OP_VSPLTISW0,
8471 OP_VSPLTISW1,
8472 OP_VSPLTISW2,
8473 OP_VSPLTISW3,
8474 OP_VSLDOI4,
8475 OP_VSLDOI8,
8476 OP_VSLDOI12
8477 };
8478
8479 if (OpNum == OP_COPY) {
8480 if (LHSID == (1*9+2)*9+3) return LHS;
8481 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8482 return RHS;
8483 }
8484
8485 SDValue OpLHS, OpRHS;
8486 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8487 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8488
8489 int ShufIdxs[16];
8490 switch (OpNum) {
8491 default: llvm_unreachable("Unknown i32 permute!");
8492 case OP_VMRGHW:
8493 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
8494 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8495 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
8496 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8497 break;
8498 case OP_VMRGLW:
8499 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8500 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8501 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8502 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8503 break;
8504 case OP_VSPLTISW0:
8505 for (unsigned i = 0; i != 16; ++i)
8506 ShufIdxs[i] = (i&3)+0;
8507 break;
8508 case OP_VSPLTISW1:
8509 for (unsigned i = 0; i != 16; ++i)
8510 ShufIdxs[i] = (i&3)+4;
8511 break;
8512 case OP_VSPLTISW2:
8513 for (unsigned i = 0; i != 16; ++i)
8514 ShufIdxs[i] = (i&3)+8;
8515 break;
8516 case OP_VSPLTISW3:
8517 for (unsigned i = 0; i != 16; ++i)
8518 ShufIdxs[i] = (i&3)+12;
8519 break;
8520 case OP_VSLDOI4:
8521 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8522 case OP_VSLDOI8:
8523 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8524 case OP_VSLDOI12:
8525 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8526 }
8527 EVT VT = OpLHS.getValueType();
8528 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8529 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8530 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8531 return DAG.getNode(ISD::BITCAST, dl, VT, T);
8532}
8533
8534/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8535/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8536/// SDValue.
8537SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8538 SelectionDAG &DAG) const {
8539 const unsigned BytesInVector = 16;
8540 bool IsLE = Subtarget.isLittleEndian();
8541 SDLoc dl(N);
8542 SDValue V1 = N->getOperand(0);
8543 SDValue V2 = N->getOperand(1);
8544 unsigned ShiftElts = 0, InsertAtByte = 0;
8545 bool Swap = false;
8546
8547 // Shifts required to get the byte we want at element 7.
8548 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
8549 0, 15, 14, 13, 12, 11, 10, 9};
8550 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8551 1, 2, 3, 4, 5, 6, 7, 8};
8552
8553 ArrayRef<int> Mask = N->getMask();
8554 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8555
8556 // For each mask element, find out if we're just inserting something
8557 // from V2 into V1 or vice versa.
8558 // Possible permutations inserting an element from V2 into V1:
8559 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8560 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8561 // ...
8562 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8563 // Inserting from V1 into V2 will be similar, except mask range will be
8564 // [16,31].
8565
8566 bool FoundCandidate = false;
8567 // If both vector operands for the shuffle are the same vector, the mask
8568 // will contain only elements from the first one and the second one will be
8569 // undef.
8570 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8571 // Go through the mask of half-words to find an element that's being moved
8572 // from one vector to the other.
8573 for (unsigned i = 0; i < BytesInVector; ++i) {
8574 unsigned CurrentElement = Mask[i];
8575 // If 2nd operand is undefined, we should only look for element 7 in the
8576 // Mask.
8577 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8578 continue;
8579
8580 bool OtherElementsInOrder = true;
8581 // Examine the other elements in the Mask to see if they're in original
8582 // order.
8583 for (unsigned j = 0; j < BytesInVector; ++j) {
8584 if (j == i)
8585 continue;
8586 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8587 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8588 // in which we always assume we're always picking from the 1st operand.
8589 int MaskOffset =
8590 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8591 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8592 OtherElementsInOrder = false;
8593 break;
8594 }
8595 }
8596 // If other elements are in original order, we record the number of shifts
8597 // we need to get the element we want into element 7. Also record which byte
8598 // in the vector we should insert into.
8599 if (OtherElementsInOrder) {
8600 // If 2nd operand is undefined, we assume no shifts and no swapping.
8601 if (V2.isUndef()) {
8602 ShiftElts = 0;
8603 Swap = false;
8604 } else {
8605 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8606 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
8607 : BigEndianShifts[CurrentElement & 0xF];
8608 Swap = CurrentElement < BytesInVector;
8609 }
8610 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
8611 FoundCandidate = true;
8612 break;
8613 }
8614 }
8615
8616 if (!FoundCandidate)
8617 return SDValue();
8618
8619 // Candidate found, construct the proper SDAG sequence with VINSERTB,
8620 // optionally with VECSHL if shift is required.
8621 if (Swap)
8622 std::swap(V1, V2);
8623 if (V2.isUndef())
8624 V2 = V1;
8625 if (ShiftElts) {
8626 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8627 DAG.getConstant(ShiftElts, dl, MVT::i32));
8628 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
8629 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8630 }
8631 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
8632 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8633}
8634
8635/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8636/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8637/// SDValue.
8638SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
8639 SelectionDAG &DAG) const {
8640 const unsigned NumHalfWords = 8;
8641 const unsigned BytesInVector = NumHalfWords * 2;
8642 // Check that the shuffle is on half-words.
8643 if (!isNByteElemShuffleMask(N, 2, 1))
8644 return SDValue();
8645
8646 bool IsLE = Subtarget.isLittleEndian();
8647 SDLoc dl(N);
8648 SDValue V1 = N->getOperand(0);
8649 SDValue V2 = N->getOperand(1);
8650 unsigned ShiftElts = 0, InsertAtByte = 0;
8651 bool Swap = false;
8652
8653 // Shifts required to get the half-word we want at element 3.
8654 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
8655 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
8656
8657 uint32_t Mask = 0;
8658 uint32_t OriginalOrderLow = 0x1234567;
8659 uint32_t OriginalOrderHigh = 0x89ABCDEF;
8660 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8661 // 32-bit space, only need 4-bit nibbles per element.
8662 for (unsigned i = 0; i < NumHalfWords; ++i) {
8663 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8664 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
8665 }
8666
8667 // For each mask element, find out if we're just inserting something
8668 // from V2 into V1 or vice versa. Possible permutations inserting an element
8669 // from V2 into V1:
8670 // X, 1, 2, 3, 4, 5, 6, 7
8671 // 0, X, 2, 3, 4, 5, 6, 7
8672 // 0, 1, X, 3, 4, 5, 6, 7
8673 // 0, 1, 2, X, 4, 5, 6, 7
8674 // 0, 1, 2, 3, X, 5, 6, 7
8675 // 0, 1, 2, 3, 4, X, 6, 7
8676 // 0, 1, 2, 3, 4, 5, X, 7
8677 // 0, 1, 2, 3, 4, 5, 6, X
8678 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8679
8680 bool FoundCandidate = false;
8681 // Go through the mask of half-words to find an element that's being moved
8682 // from one vector to the other.
8683 for (unsigned i = 0; i < NumHalfWords; ++i) {
8684 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
8685 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
8686 uint32_t MaskOtherElts = ~(0xF << MaskShift);
8687 uint32_t TargetOrder = 0x0;
8688
8689 // If both vector operands for the shuffle are the same vector, the mask
8690 // will contain only elements from the first one and the second one will be
8691 // undef.
8692 if (V2.isUndef()) {
8693 ShiftElts = 0;
8694 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
8695 TargetOrder = OriginalOrderLow;
8696 Swap = false;
8697 // Skip if not the correct element or mask of other elements don't equal
8698 // to our expected order.
8699 if (MaskOneElt == VINSERTHSrcElem &&
8700 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8701 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8702 FoundCandidate = true;
8703 break;
8704 }
8705 } else { // If both operands are defined.
8706 // Target order is [8,15] if the current mask is between [0,7].
8707 TargetOrder =
8708 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
8709 // Skip if mask of other elements don't equal our expected order.
8710 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
8711 // We only need the last 3 bits for the number of shifts.
8712 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
8713 : BigEndianShifts[MaskOneElt & 0x7];
8714 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
8715 Swap = MaskOneElt < NumHalfWords;
8716 FoundCandidate = true;
8717 break;
8718 }
8719 }
8720 }
8721
8722 if (!FoundCandidate)
8723 return SDValue();
8724
8725 // Candidate found, construct the proper SDAG sequence with VINSERTH,
8726 // optionally with VECSHL if shift is required.
8727 if (Swap)
8728 std::swap(V1, V2);
8729 if (V2.isUndef())
8730 V2 = V1;
8731 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8732 if (ShiftElts) {
8733 // Double ShiftElts because we're left shifting on v16i8 type.
8734 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
8735 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
8736 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
8737 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8738 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8739 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8740 }
8741 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8742 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
8743 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8744 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8745}
8746
8747/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8748/// is a shuffle we can handle in a single instruction, return it. Otherwise,
8749/// return the code it can be lowered into. Worst case, it can always be
8750/// lowered into a vperm.
8751SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8752 SelectionDAG &DAG) const {
8753 SDLoc dl(Op);
8754 SDValue V1 = Op.getOperand(0);
8755 SDValue V2 = Op.getOperand(1);
8756 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8757 EVT VT = Op.getValueType();
8758 bool isLittleEndian = Subtarget.isLittleEndian();
8759
8760 unsigned ShiftElts, InsertAtByte;
8761 bool Swap = false;
8762 if (Subtarget.hasP9Vector() &&
8763 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
8764 isLittleEndian)) {
8765 if (Swap)
8766 std::swap(V1, V2);
8767 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8768 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
8769 if (ShiftElts) {
8770 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
8771 DAG.getConstant(ShiftElts, dl, MVT::i32));
8772 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
8773 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8774 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8775 }
8776 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
8777 DAG.getConstant(InsertAtByte, dl, MVT::i32));
8778 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
8779 }
8780
8781 if (Subtarget.hasP9Altivec()) {
8782 SDValue NewISDNode;
8783 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
8784 return NewISDNode;
8785
8786 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
8787 return NewISDNode;
8788 }
8789
8790 if (Subtarget.hasVSX() &&
8791 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8792 if (Swap)
8793 std::swap(V1, V2);
8794 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8795 SDValue Conv2 =
8796 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
8797
8798 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
8799 DAG.getConstant(ShiftElts, dl, MVT::i32));
8800 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
8801 }
8802
8803 if (Subtarget.hasVSX() &&
8804 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
8805 if (Swap)
8806 std::swap(V1, V2);
8807 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8808 SDValue Conv2 =
8809 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
8810
8811 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
8812 DAG.getConstant(ShiftElts, dl, MVT::i32));
8813 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
8814 }
8815
8816 if (Subtarget.hasP9Vector()) {
8817 if (PPC::isXXBRHShuffleMask(SVOp)) {
8818 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8819 SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
8820 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
8821 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
8822 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8823 SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
8824 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
8825 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
8826 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
8827 SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
8828 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
8829 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
8830 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
8831 SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
8832 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
8833 }
8834 }
8835
8836 if (Subtarget.hasVSX()) {
8837 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
8838 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
8839
8840 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
8841 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
8842 DAG.getConstant(SplatIdx, dl, MVT::i32));
8843 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
8844 }
8845
8846 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8847 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
8848 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8849 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
8850 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
8851 }
8852 }
8853
8854 if (Subtarget.hasQPX()) {
8855 if (VT.getVectorNumElements() != 4)
8856 return SDValue();
8857
8858 if (V2.isUndef()) V2 = V1;
8859
8860 int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
8861 if (AlignIdx != -1) {
8862 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
8863 DAG.getConstant(AlignIdx, dl, MVT::i32));
8864 } else if (SVOp->isSplat()) {
8865 int SplatIdx = SVOp->getSplatIndex();
8866 if (SplatIdx >= 4) {
8867 std::swap(V1, V2);
8868 SplatIdx -= 4;
8869 }
8870
8871 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
8872 DAG.getConstant(SplatIdx, dl, MVT::i32));
8873 }
8874
8875 // Lower this into a qvgpci/qvfperm pair.
8876
8877 // Compute the qvgpci literal
8878 unsigned idx = 0;
8879 for (unsigned i = 0; i < 4; ++i) {
8880 int m = SVOp->getMaskElt(i);
8881 unsigned mm = m >= 0 ? (unsigned) m : i;
8882 idx |= mm << (3-i)*3;
8883 }
8884
8886 DAG.getConstant(idx, dl, MVT::i32));
8887 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
8888 }
8889
8890 // Cases that are handled by instructions that take permute immediates
8891 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8892 // selected by the instruction selector.
8893 if (V2.isUndef()) {
8894 if (PPC::isSplatShuffleMask(SVOp, 1) ||
8895 PPC::isSplatShuffleMask(SVOp, 2) ||
8896 PPC::isSplatShuffleMask(SVOp, 4) ||
8897 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
8898 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
8899 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
8900 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
8901 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
8902 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
8903 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
8904 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
8905 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
8906 (Subtarget.hasP8Altivec() && (
8907 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
8908 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
8909 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
8910 return Op;
8911 }
8912 }
8913
8914 // Altivec has a variety of "shuffle immediates" that take two vector inputs
8915 // and produce a fixed permutation. If any of these match, do not lower to
8916 // VPERM.
8917 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
8918 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8919 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8920 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
8921 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8922 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8923 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8924 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
8925 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
8926 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
8927 (Subtarget.hasP8Altivec() && (
8928 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
8929 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
8930 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
8931 return Op;
8932
8933 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
8934 // perfect shuffle table to emit an optimal matching sequence.
8935 ArrayRef<int> PermMask = SVOp->getMask();
8936
8937 unsigned PFIndexes[4];
8938 bool isFourElementShuffle = true;
8939 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
8940 unsigned EltNo = 8; // Start out undef.
8941 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
8942 if (PermMask[i*4+j] < 0)
8943 continue; // Undef, ignore it.
8944
8945 unsigned ByteSource = PermMask[i*4+j];
8946 if ((ByteSource & 3) != j) {
8947 isFourElementShuffle = false;
8948 break;
8949 }
8950
8951 if (EltNo == 8) {
8952 EltNo = ByteSource/4;
8953 } else if (EltNo != ByteSource/4) {
8954 isFourElementShuffle = false;
8955 break;
8956 }
8957 }
8958 PFIndexes[i] = EltNo;
8959 }
8960
8961 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8962 // perfect shuffle vector to determine if it is cost effective to do this as
8963 // discrete instructions, or whether we should use a vperm.
8964 // For now, we skip this for little endian until such time as we have a
8965 // little-endian perfect shuffle table.
8966 if (isFourElementShuffle && !isLittleEndian) {
8967 // Compute the index in the perfect shuffle table.
8968 unsigned PFTableIndex =
8969 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8970
8971 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8972 unsigned Cost = (PFEntry >> 30);
8973
8974 // Determining when to avoid vperm is tricky. Many things affect the cost
8975 // of vperm, particularly how many times the perm mask needs to be computed.
8976 // For example, if the perm mask can be hoisted out of a loop or is already
8977 // used (perhaps because there are multiple permutes with the same shuffle
8978 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
8979 // the loop requires an extra register.
8980 //
8981 // As a compromise, we only emit discrete instructions if the shuffle can be
8982 // generated in 3 or fewer operations. When we have loop information
8983 // available, if this block is within a loop, we should avoid using vperm
8984 // for 3-operation perms and use a constant pool load instead.
8985 if (Cost < 3)
8986 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8987 }
8988
8989 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8990 // vector that will get spilled to the constant pool.
8991 if (V2.isUndef()) V2 = V1;
8992
8993 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8994 // that it is in input element units, not in bytes. Convert now.
8995
8996 // For little endian, the order of the input vectors is reversed, and
8997 // the permutation mask is complemented with respect to 31. This is
8998 // necessary to produce proper semantics with the big-endian-biased vperm
8999 // instruction.
9000 EVT EltVT = V1.getValueType().getVectorElementType();
9001 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9002
9003 SmallVector<SDValue, 16> ResultMask;
9004 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9005 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9006
9007 for (unsigned j = 0; j != BytesPerElement; ++j)
9008 if (isLittleEndian)
9009 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9010 dl, MVT::i32));
9011 else
9012 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9013 MVT::i32));
9014 }
9015
9016 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9017 if (isLittleEndian)
9018 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9019 V2, V1, VPermMask);
9020 else
9021 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9022 V1, V2, VPermMask);
9023}
9024
9025/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9026/// vector comparison. If it is, return true and fill in Opc/isDot with
9027/// information about the intrinsic.
9028static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9029 bool &isDot, const PPCSubtarget &Subtarget) {
9030 unsigned IntrinsicID =
9031 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9032 CompareOpc = -1;
9033 isDot = false;
9034 switch (IntrinsicID) {
9035 default:
9036 return false;
9037 // Comparison predicates.
9038 case Intrinsic::ppc_altivec_vcmpbfp_p:
9039 CompareOpc = 966;
9040 isDot = true;
9041 break;
9042 case Intrinsic::ppc_altivec_vcmpeqfp_p:
9043 CompareOpc = 198;
9044 isDot = true;
9045 break;
9046 case Intrinsic::ppc_altivec_vcmpequb_p:
9047 CompareOpc = 6;
9048 isDot = true;
9049 break;
9050 case Intrinsic::ppc_altivec_vcmpequh_p:
9051 CompareOpc = 70;
9052 isDot = true;
9053 break;
9054 case Intrinsic::ppc_altivec_vcmpequw_p:
9055 CompareOpc = 134;
9056 isDot = true;
9057 break;
9058 case Intrinsic::ppc_altivec_vcmpequd_p:
9059 if (Subtarget.hasP8Altivec()) {
9060 CompareOpc = 199;
9061 isDot = true;
9062 } else
9063 return false;
9064 break;
9065 case Intrinsic::ppc_altivec_vcmpneb_p:
9066 case Intrinsic::ppc_altivec_vcmpneh_p:
9067 case Intrinsic::ppc_altivec_vcmpnew_p:
9068 case Intrinsic::ppc_altivec_vcmpnezb_p:
9069 case Intrinsic::ppc_altivec_vcmpnezh_p:
9070 case Intrinsic::ppc_altivec_vcmpnezw_p:
9071 if (Subtarget.hasP9Altivec()) {
9072 switch (IntrinsicID) {
9073 default:
9074 llvm_unreachable("Unknown comparison intrinsic.");
9075 case Intrinsic::ppc_altivec_vcmpneb_p:
9076 CompareOpc = 7;
9077 break;
9078 case Intrinsic::ppc_altivec_vcmpneh_p:
9079 CompareOpc = 71;
9080 break;
9081 case Intrinsic::ppc_altivec_vcmpnew_p:
9082 CompareOpc = 135;
9083 break;
9084 case Intrinsic::ppc_altivec_vcmpnezb_p:
9085 CompareOpc = 263;
9086 break;
9087 case Intrinsic::ppc_altivec_vcmpnezh_p:
9088 CompareOpc = 327;
9089 break;
9090 case Intrinsic::ppc_altivec_vcmpnezw_p:
9091 CompareOpc = 391;
9092 break;
9093 }
9094 isDot = true;
9095 } else
9096 return false;
9097 break;
9098 case Intrinsic::ppc_altivec_vcmpgefp_p:
9099 CompareOpc = 454;
9100 isDot = true;
9101 break;
9102 case Intrinsic::ppc_altivec_vcmpgtfp_p:
9103 CompareOpc = 710;
9104 isDot = true;
9105 break;
9106 case Intrinsic::ppc_altivec_vcmpgtsb_p:
9107 CompareOpc = 774;
9108 isDot = true;
9109 break;
9110 case Intrinsic::ppc_altivec_vcmpgtsh_p:
9111 CompareOpc = 838;
9112 isDot = true;
9113 break;
9114 case Intrinsic::ppc_altivec_vcmpgtsw_p:
9115 CompareOpc = 902;
9116 isDot = true;
9117 break;
9118 case Intrinsic::ppc_altivec_vcmpgtsd_p:
9119 if (Subtarget.hasP8Altivec()) {
9120 CompareOpc = 967;
9121 isDot = true;
9122 } else
9123 return false;
9124 break;
9125 case Intrinsic::ppc_altivec_vcmpgtub_p:
9126 CompareOpc = 518;
9127 isDot = true;
9128 break;
9129 case Intrinsic::ppc_altivec_vcmpgtuh_p:
9130 CompareOpc = 582;
9131 isDot = true;
9132 break;
9133 case Intrinsic::ppc_altivec_vcmpgtuw_p:
9134 CompareOpc = 646;
9135 isDot = true;
9136 break;
9137 case Intrinsic::ppc_altivec_vcmpgtud_p:
9138 if (Subtarget.hasP8Altivec()) {
9139 CompareOpc = 711;
9140 isDot = true;
9141 } else
9142 return false;
9143 break;
9144
9145 // VSX predicate comparisons use the same infrastructure
9146 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9147 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9148 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9149 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9150 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9151 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9152 if (Subtarget.hasVSX()) {
9153 switch (IntrinsicID) {
9154 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9155 CompareOpc = 99;
9156 break;
9157 case Intrinsic::ppc_vsx_xvcmpgedp_p:
9158 CompareOpc = 115;
9159 break;
9160 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9161 CompareOpc = 107;
9162 break;
9163 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9164 CompareOpc = 67;
9165 break;
9166 case Intrinsic::ppc_vsx_xvcmpgesp_p:
9167 CompareOpc = 83;
9168 break;
9169 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9170 CompareOpc = 75;
9171 break;
9172 }
9173 isDot = true;
9174 } else
9175 return false;
9176 break;
9177
9178 // Normal Comparisons.
9179 case Intrinsic::ppc_altivec_vcmpbfp:
9180 CompareOpc = 966;
9181 break;
9182 case Intrinsic::ppc_altivec_vcmpeqfp:
9183 CompareOpc = 198;
9184 break;
9185 case Intrinsic::ppc_altivec_vcmpequb:
9186 CompareOpc = 6;
9187 break;
9188 case Intrinsic::ppc_altivec_vcmpequh:
9189 CompareOpc = 70;
9190 break;
9191 case Intrinsic::ppc_altivec_vcmpequw:
9192 CompareOpc = 134;
9193 break;
9194 case Intrinsic::ppc_altivec_vcmpequd:
9195 if (Subtarget.hasP8Altivec())
9196 CompareOpc = 199;
9197 else
9198 return false;
9199 break;
9200 case Intrinsic::ppc_altivec_vcmpneb:
9201 case Intrinsic::ppc_altivec_vcmpneh:
9202 case Intrinsic::ppc_altivec_vcmpnew:
9203 case Intrinsic::ppc_altivec_vcmpnezb:
9204 case Intrinsic::ppc_altivec_vcmpnezh:
9205 case Intrinsic::ppc_altivec_vcmpnezw:
9206 if (Subtarget.hasP9Altivec())
9207 switch (IntrinsicID) {
9208 default:
9209 llvm_unreachable("Unknown comparison intrinsic.");
9210 case Intrinsic::ppc_altivec_vcmpneb:
9211 CompareOpc = 7;
9212 break;
9213 case Intrinsic::ppc_altivec_vcmpneh:
9214 CompareOpc = 71;
9215 break;
9216 case Intrinsic::ppc_altivec_vcmpnew:
9217 CompareOpc = 135;
9218 break;
9219 case Intrinsic::ppc_altivec_vcmpnezb:
9220 CompareOpc = 263;
9221 break;
9222 case Intrinsic::ppc_altivec_vcmpnezh:
9223 CompareOpc = 327;
9224 break;
9225 case Intrinsic::ppc_altivec_vcmpnezw:
9226 CompareOpc = 391;
9227 break;
9228 }
9229 else
9230 return false;
9231 break;
9232 case Intrinsic::ppc_altivec_vcmpgefp:
9233 CompareOpc = 454;
9234 break;
9235 case Intrinsic::ppc_altivec_vcmpgtfp:
9236 CompareOpc = 710;
9237 break;
9238 case Intrinsic::ppc_altivec_vcmpgtsb:
9239 CompareOpc = 774;
9240 break;
9241 case Intrinsic::ppc_altivec_vcmpgtsh:
9242 CompareOpc = 838;
9243 break;
9244 case Intrinsic::ppc_altivec_vcmpgtsw:
9245 CompareOpc = 902;
9246 break;
9247 case Intrinsic::ppc_altivec_vcmpgtsd:
9248 if (Subtarget.hasP8Altivec())
9249 CompareOpc = 967;
9250 else
9251 return false;
9252 break;
9253 case Intrinsic::ppc_altivec_vcmpgtub:
9254 CompareOpc = 518;
9255 break;
9256 case Intrinsic::ppc_altivec_vcmpgtuh:
9257 CompareOpc = 582;
9258 break;
9259 case Intrinsic::ppc_altivec_vcmpgtuw:
9260 CompareOpc = 646;
9261 break;
9262 case Intrinsic::ppc_altivec_vcmpgtud:
9263 if (Subtarget.hasP8Altivec())
9264 CompareOpc = 711;
9265 else
9266 return false;
9267 break;
9268 }
9269 return true;
9270}
9271
9272/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9273/// lower, do it, otherwise return null.
9274SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9275 SelectionDAG &DAG) const {
9276 unsigned IntrinsicID =
9277 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9278
9279 SDLoc dl(Op);
9280
9281 if (IntrinsicID == Intrinsic::thread_pointer) {
9282 // Reads the thread pointer register, used for __builtin_thread_pointer.
9283 if (Subtarget.isPPC64())
9284 return DAG.getRegister(PPC::X13, MVT::i64);
9285 return DAG.getRegister(PPC::R2, MVT::i32);
9286 }
9287
9288 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9289 // opcode number of the comparison.
9290 int CompareOpc;
9291 bool isDot;
9292 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9293 return SDValue(); // Don't custom lower most intrinsics.
9294
9295 // If this is a non-dot comparison, make the VCMP node and we are done.
9296 if (!isDot) {
9297 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9298 Op.getOperand(1), Op.getOperand(2),
9299 DAG.getConstant(CompareOpc, dl, MVT::i32));
9300 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9301 }
9302
9303 // Create the PPCISD altivec 'dot' comparison node.
9304 SDValue Ops[] = {
9305 Op.getOperand(2), // LHS
9306 Op.getOperand(3), // RHS
9307 DAG.getConstant(CompareOpc, dl, MVT::i32)
9308 };
9309 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9310 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
9311
9312 // Now that we have the comparison, emit a copy from the CR to a GPR.
9313 // This is flagged to the above dot comparison.
9314 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9315 DAG.getRegister(PPC::CR6, MVT::i32),
9316 CompNode.getValue(1));
9317
9318 // Unpack the result based on how the target uses it.
9319 unsigned BitNo; // Bit # of CR6.
9320 bool InvertBit; // Invert result?
9321 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9322 default: // Can't happen, don't crash on invalid number though.
9323 case 0: // Return the value of the EQ bit of CR6.
9324 BitNo = 0; InvertBit = false;
9325 break;
9326 case 1: // Return the inverted value of the EQ bit of CR6.
9327 BitNo = 0; InvertBit = true;
9328 break;
9329 case 2: // Return the value of the LT bit of CR6.
9330 BitNo = 2; InvertBit = false;
9331 break;
9332 case 3: // Return the inverted value of the LT bit of CR6.
9333 BitNo = 2; InvertBit = true;
9334 break;
9335 }
9336
9337 // Shift the bit into the low position.
9338 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9339 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9340 // Isolate the bit.
9341 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9342 DAG.getConstant(1, dl, MVT::i32));
9343
9344 // If we are supposed to, toggle the bit.
9345 if (InvertBit)
9346 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9347 DAG.getConstant(1, dl, MVT::i32));
9348 return Flags;
9349}
9350
9351SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9352 SelectionDAG &DAG) const {
9353 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9354 // the beginning of the argument list.
9355 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9356 SDLoc DL(Op);
9357 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9358 case Intrinsic::ppc_cfence: {
9359 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9360 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9361 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9363 Op.getOperand(ArgStart + 1)),
9364 Op.getOperand(0)),
9365 0);
9366 }
9367 default:
9368 break;
9369 }
9370 return SDValue();
9371}
9372
9373SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
9374 // Check for a DIV with the same operands as this REM.
9375 for (auto UI : Op.getOperand(1)->uses()) {
9376 if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
9377 (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
9378 if (UI->getOperand(0) == Op.getOperand(0) &&
9379 UI->getOperand(1) == Op.getOperand(1))
9380 return SDValue();
9381 }
9382 return Op;
9383}
9384
9385// Lower scalar BSWAP64 to xxbrd.
9386SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9387 SDLoc dl(Op);
9388 // MTVSRDD
9389 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9390 Op.getOperand(0));
9391 // XXBRD
9392 Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
9393 // MFVSRD
9394 int VectorIndex = 0;
9395 if (Subtarget.isLittleEndian())
9396 VectorIndex = 1;
9398 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9399 return Op;
9400}
9401
9402// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9403// compared to a value that is atomically loaded (atomic loads zero-extend).
9404SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9405 SelectionDAG &DAG) const {
9406 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
9407 "Expecting an atomic compare-and-swap here.");
9408 SDLoc dl(Op);
9409 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9410 EVT MemVT = AtomicNode->getMemoryVT();
9411 if (MemVT.getSizeInBits() >= 32)
9412 return Op;
9413
9414 SDValue CmpOp = Op.getOperand(2);
9415 // If this is already correctly zero-extended, leave it alone.
9416 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
9417 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
9418 return Op;
9419
9420 // Clear the high bits of the compare operand.
9421 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
9422 SDValue NewCmpOp =
9423 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
9424 DAG.getConstant(MaskVal, dl, MVT::i32));
9425
9426 // Replace the existing compare operand with the properly zero-extended one.
9428 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
9429 Ops.push_back(AtomicNode->getOperand(i));
9430 Ops[2] = NewCmpOp;
9431 MachineMemOperand *MMO = AtomicNode->getMemOperand();
9433 auto NodeTy =
9435 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
9436}
9437
9438SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
9439 SelectionDAG &DAG) const {
9440 SDLoc dl(Op);
9441 // Create a stack slot that is 16-byte aligned.
9443 int FrameIdx = MFI.CreateStackObject(16, 16, false);
9444 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9445 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9446
9447 // Store the input value into Value#0 of the stack slot.
9448 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
9450 // Load it out.
9451 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
9452}
9453
9454SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9455 SelectionDAG &DAG) const {
9456 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
9457 "Should only be called for ISD::INSERT_VECTOR_ELT");
9458
9459 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
9460 // We have legal lowering for constant indices but not for variable ones.
9461 if (!C)
9462 return SDValue();
9463
9464 EVT VT = Op.getValueType();
9465 SDLoc dl(Op);
9466 SDValue V1 = Op.getOperand(0);
9467 SDValue V2 = Op.getOperand(1);
9468 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9469 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
9470 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
9471 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
9472 unsigned InsertAtElement = C->getZExtValue();
9473 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
9474 if (Subtarget.isLittleEndian()) {
9475 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
9476 }
9477 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
9478 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9479 }
9480 return Op;
9481}
9482
9483SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9484 SelectionDAG &DAG) const {
9485 SDLoc dl(Op);
9486 SDNode *N = Op.getNode();
9487
9488 assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
9489 "Unknown extract_vector_elt type");
9490
9491 SDValue Value = N->getOperand(0);
9492
9493 // The first part of this is like the store lowering except that we don't
9494 // need to track the chain.
9495
9496 // The values are now known to be -1 (false) or 1 (true). To convert this
9497 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9498 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9500
9501 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9502 // understand how to form the extending load.
9503 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9504
9505 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9506
9507 // Now convert to an integer and store.
9509 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9510 Value);
9511
9513 int FrameIdx = MFI.CreateStackObject(16, 16, false);
9514 MachinePointerInfo PtrInfo =
9516 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9517 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9518
9519 SDValue StoreChain = DAG.getEntryNode();
9520 SDValue Ops[] = {StoreChain,
9521 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9522 Value, FIdx};
9523 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9524
9526 dl, VTs, Ops, MVT::v4i32, PtrInfo);
9527
9528 // Extract the value requested.
9529 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9530 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9531 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9532
9533 SDValue IntVal =
9534 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
9535
9536 if (!Subtarget.useCRBits())
9537 return IntVal;
9538
9539 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
9540}
9541
9542/// Lowering for QPX v4i1 loads
9543SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
9544 SelectionDAG &DAG) const {
9545 SDLoc dl(Op);
9546 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
9547 SDValue LoadChain = LN->getChain();
9548 SDValue BasePtr = LN->getBasePtr();
9549
9550 if (Op.getValueType() == MVT::v4f64 ||
9551 Op.getValueType() == MVT::v4f32) {
9552 EVT MemVT = LN->getMemoryVT();
9553 unsigned Alignment = LN->getAlignment();
9554
9555 // If this load is properly aligned, then it is legal.
9556 if (Alignment >= MemVT.getStoreSize())
9557 return Op;
9558
9559 EVT ScalarVT = Op.getValueType().getScalarType(),
9560 ScalarMemVT = MemVT.getScalarType();
9561 unsigned Stride = ScalarMemVT.getStoreSize();
9562
9563 SDValue Vals[4], LoadChains[4];
9564 for (unsigned Idx = 0; Idx < 4; ++Idx) {
9565 SDValue Load;
9566 if (ScalarVT != ScalarMemVT)
9567 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
9568 BasePtr,
9569 LN->getPointerInfo().getWithOffset(Idx * Stride),
9570 ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9571 LN->getMemOperand()->getFlags(), LN->getAAInfo());
9572 else
9573 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
9574 LN->getPointerInfo().getWithOffset(Idx * Stride),
9575 MinAlign(Alignment, Idx * Stride),
9576 LN->getMemOperand()->getFlags(), LN->getAAInfo());
9577
9578 if (Idx == 0 && LN->isIndexed()) {
9580 "Unknown addressing mode on vector load");
9581 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
9582 LN->getAddressingMode());
9583 }
9584
9585 Vals[Idx] = Load;
9586 LoadChains[Idx] = Load.getValue(1);
9587
9588 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9589 DAG.getConstant(Stride, dl,
9590 BasePtr.getValueType()));
9591 }
9592
9593 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9594 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
9595
9596 if (LN->isIndexed()) {
9597 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
9598 return DAG.getMergeValues(RetOps, dl);
9599 }
9600
9601 SDValue RetOps[] = { Value, TF };
9602 return DAG.getMergeValues(RetOps, dl);
9603 }
9604
9605 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
9606 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
9607
9608 // To lower v4i1 from a byte array, we load the byte elements of the
9609 // vector and then reuse the BUILD_VECTOR logic.
9610
9611 SDValue VectElmts[4], VectElmtChains[4];
9612 for (unsigned i = 0; i < 4; ++i) {
9613 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9614 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9615
9616 VectElmts[i] = DAG.getExtLoad(
9617 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
9619 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
9620 VectElmtChains[i] = VectElmts[i].getValue(1);
9621 }
9622
9623 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
9624 SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
9625
9626 SDValue RVals[] = { Value, LoadChain };
9627 return DAG.getMergeValues(RVals, dl);
9628}
9629
9630/// Lowering for QPX v4i1 stores
9631SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
9632 SelectionDAG &DAG) const {
9633 SDLoc dl(Op);
9634 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
9635 SDValue StoreChain = SN->getChain();
9636 SDValue BasePtr = SN->getBasePtr();
9637 SDValue Value = SN->getValue();
9638
9639 if (Value.getValueType() == MVT::v4f64 ||
9640 Value.getValueType() == MVT::v4f32) {
9641 EVT MemVT = SN->getMemoryVT();
9642 unsigned Alignment = SN->getAlignment();
9643
9644 // If this store is properly aligned, then it is legal.
9645 if (Alignment >= MemVT.getStoreSize())
9646 return Op;
9647
9648 EVT ScalarVT = Value.getValueType().getScalarType(),
9649 ScalarMemVT = MemVT.getScalarType();
9650 unsigned Stride = ScalarMemVT.getStoreSize();
9651
9652 SDValue Stores[4];
9653 for (unsigned Idx = 0; Idx < 4; ++Idx) {
9654 SDValue Ex = DAG.getNode(
9655 ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
9656 DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
9657 SDValue Store;
9658 if (ScalarVT != ScalarMemVT)
9659 Store =
9660 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
9661 SN->getPointerInfo().getWithOffset(Idx * Stride),
9662 ScalarMemVT, MinAlign(Alignment, Idx * Stride),
9663 SN->getMemOperand()->getFlags(), SN->getAAInfo());
9664 else
9665 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
9666 SN->getPointerInfo().getWithOffset(Idx * Stride),
9667 MinAlign(Alignment, Idx * Stride),
9668 SN->getMemOperand()->getFlags(), SN->getAAInfo());
9669
9670 if (Idx == 0 && SN->isIndexed()) {
9672 "Unknown addressing mode on vector store");
9673 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
9674 SN->getAddressingMode());
9675 }
9676
9677 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
9678 DAG.getConstant(Stride, dl,
9679 BasePtr.getValueType()));
9680 Stores[Idx] = Store;
9681 }
9682
9683 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9684
9685 if (SN->isIndexed()) {
9686 SDValue RetOps[] = { TF, Stores[0].getValue(1) };
9687 return DAG.getMergeValues(RetOps, dl);
9688 }
9689
9690 return TF;
9691 }
9692
9693 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
9694 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
9695
9696 // The values are now known to be -1 (false) or 1 (true). To convert this
9697 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9698 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9700
9701 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9702 // understand how to form the extending load.
9703 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
9704
9705 Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
9706
9707 // Now convert to an integer and store.
9709 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
9710 Value);
9711
9713 int FrameIdx = MFI.CreateStackObject(16, 16, false);
9714 MachinePointerInfo PtrInfo =
9716 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9717 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9718
9719 SDValue Ops[] = {StoreChain,
9720 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
9721 Value, FIdx};
9722 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
9723
9725 dl, VTs, Ops, MVT::v4i32, PtrInfo);
9726
9727 // Move data into the byte array.
9728 SDValue Loads[4], LoadChains[4];
9729 for (unsigned i = 0; i < 4; ++i) {
9730 unsigned Offset = 4*i;
9731 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9732 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9733
9734 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
9735 PtrInfo.getWithOffset(Offset));
9736 LoadChains[i] = Loads[i].getValue(1);
9737 }
9738
9739 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
9740
9741 SDValue Stores[4];
9742 for (unsigned i = 0; i < 4; ++i) {
9743 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
9744 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
9745
9746 Stores[i] = DAG.getTruncStore(
9747 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
9748 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
9749 SN->getAAInfo());
9750 }
9751
9752 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9753
9754 return StoreChain;
9755}
9756
9757SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
9758 SDLoc dl(Op);
9759 if (Op.getValueType() == MVT::v4i32) {
9760 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9761
9762 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
9763 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
9764
9765 SDValue RHSSwap = // = vrlw RHS, 16
9766 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
9767
9768 // Shrinkify inputs to v8i16.
9769 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
9770 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
9771 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
9772
9773 // Low parts multiplied together, generating 32-bit results (we ignore the
9774 // top parts).
9775 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
9776 LHS, RHS, DAG, dl, MVT::v4i32);
9777
9778 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
9779 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
9780 // Shift the high parts up 16 bits.
9781 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
9782 Neg16, DAG, dl);
9783 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
9784 } else if (Op.getValueType() == MVT::v8i16) {
9785 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9786
9787 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
9788
9789 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
9790 LHS, RHS, Zero, DAG, dl);
9791 } else if (Op.getValueType() == MVT::v16i8) {
9792 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
9793 bool isLittleEndian = Subtarget.isLittleEndian();
9794
9795 // Multiply the even 8-bit parts, producing 16-bit sums.
9796 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
9797 LHS, RHS, DAG, dl, MVT::v8i16);
9798 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
9799
9800 // Multiply the odd 8-bit parts, producing 16-bit sums.
9801 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
9802 LHS, RHS, DAG, dl, MVT::v8i16);
9803 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
9804
9805 // Merge the results together. Because vmuleub and vmuloub are
9806 // instructions with a big-endian bias, we must reverse the
9807 // element numbering and reverse the meaning of "odd" and "even"
9808 // when generating little endian code.
9809 int Ops[16];
9810 for (unsigned i = 0; i != 8; ++i) {
9811 if (isLittleEndian) {
9812 Ops[i*2 ] = 2*i;
9813 Ops[i*2+1] = 2*i+16;
9814 } else {
9815 Ops[i*2 ] = 2*i+1;
9816 Ops[i*2+1] = 2*i+1+16;
9817 }
9818 }
9819 if (isLittleEndian)
9820 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
9821 else
9822 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
9823 } else {
9824 llvm_unreachable("Unknown mul to lower!");
9825 }
9826}
9827
9828SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
9829
9830 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
9831
9832 EVT VT = Op.getValueType();
9833 assert(VT.isVector() &&
9834 "Only set vector abs as custom, scalar abs shouldn't reach here!");
9835 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
9836 VT == MVT::v16i8) &&
9837 "Unexpected vector element type!");
9838 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
9839 "Current subtarget doesn't support smax v2i64!");
9840
9841 // For vector abs, it can be lowered to:
9842 // abs x
9843 // ==>
9844 // y = -x
9845 // smax(x, y)
9846
9847 SDLoc dl(Op);
9848 SDValue X = Op.getOperand(0);
9849 SDValue Zero = DAG.getConstant(0, dl, VT);
9850 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
9851
9852 // SMAX patch https://reviews.llvm.org/D47332
9853 // hasn't landed yet, so use intrinsic first here.
9854 // TODO: Should use SMAX directly once SMAX patch landed
9855 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
9856 if (VT == MVT::v2i64)
9857 BifID = Intrinsic::ppc_altivec_vmaxsd;
9858 else if (VT == MVT::v8i16)
9859 BifID = Intrinsic::ppc_altivec_vmaxsh;
9860 else if (VT == MVT::v16i8)
9861 BifID = Intrinsic::ppc_altivec_vmaxsb;
9862
9863 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
9864}
9865
9866// Custom lowering for fpext vf32 to v2f64
9867SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
9868
9869 assert(Op.getOpcode() == ISD::FP_EXTEND &&
9870 "Should only be called for ISD::FP_EXTEND");
9871
9872 // We only want to custom lower an extend from v2f32 to v2f64.
9873 if (Op.getValueType() != MVT::v2f64 ||
9874 Op.getOperand(0).getValueType() != MVT::v2f32)
9875 return SDValue();
9876
9877 SDLoc dl(Op);
9878 SDValue Op0 = Op.getOperand(0);
9879
9880 switch (Op0.getOpcode()) {
9881 default:
9882 return SDValue();
9883 case ISD::FADD:
9884 case ISD::FMUL:
9885 case ISD::FSUB: {
9886 SDValue NewLoad[2];
9887 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
9888 // Ensure both input are loads.
9889 SDValue LdOp = Op0.getOperand(i);
9890 if (LdOp.getOpcode() != ISD::LOAD)
9891 return SDValue();
9892 // Generate new load node.
9893 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
9894 SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9895 NewLoad[i] =
9898 LoadOps, LD->getMemoryVT(),
9899 LD->getMemOperand());
9900 }
9901 SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
9902 NewLoad[0], NewLoad[1],
9903 Op0.getNode()->getFlags());
9904 return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
9905 }
9906 case ISD::LOAD: {
9907 LoadSDNode *LD = cast<LoadSDNode>(Op0);
9908 SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
9909 SDValue NewLd =
9912 LoadOps, LD->getMemoryVT(), LD->getMemOperand());
9913 return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
9914 }
9915 }
9916 llvm_unreachable("ERROR:Should return for all cases within swtich.");
9917}
9918
9919/// LowerOperation - Provide custom lowering hooks for some operations.
9920///
9922 switch (Op.getOpcode()) {
9923 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9924 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
9925 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
9926 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
9927 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
9928 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
9929 case ISD::SETCC: return LowerSETCC(Op, DAG);
9930 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
9931 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
9932
9933 // Variable argument lowering.
9934 case ISD::VASTART: return LowerVASTART(Op, DAG);
9935 case ISD::VAARG: return LowerVAARG(Op, DAG);
9936 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
9937
9938 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
9939 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
9941 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
9942
9943 // Exception handling lowering.
9944 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
9945 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
9946 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
9947
9948 case ISD::LOAD: return LowerLOAD(Op, DAG);
9949 case ISD::STORE: return LowerSTORE(Op, DAG);
9950 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
9951 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
9952 case ISD::FP_TO_UINT:
9953 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
9954 case ISD::UINT_TO_FP:
9955 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
9956 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
9957
9958 // Lower 64-bit shifts.
9959 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
9960 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
9961 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
9962
9963 // Vector-related lowering.
9964 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
9965 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
9966 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
9967 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
9968 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
9969 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
9970 case ISD::MUL: return LowerMUL(Op, DAG);
9971 case ISD::ABS: return LowerABS(Op, DAG);
9972 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
9973
9974 // For counter-based loop handling.
9975 case ISD::INTRINSIC_W_CHAIN: return SDValue();
9976
9977 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
9978
9979 // Frame & Return address.
9980 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
9981 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
9982
9984 return LowerINTRINSIC_VOID(Op, DAG);
9985 case ISD::SREM:
9986 case ISD::UREM:
9987 return LowerREM(Op, DAG);
9988 case ISD::BSWAP:
9989 return LowerBSWAP(Op, DAG);
9991 return LowerATOMIC_CMP_SWAP(Op, DAG);
9992 }
9993}
9994
9997 SelectionDAG &DAG) const {
9998 SDLoc dl(N);
9999 switch (N->getOpcode()) {
10000 default:
10001 llvm_unreachable("Do not know how to custom type legalize this operation!");
10002 case ISD::READCYCLECOUNTER: {
10004 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10005
10006 Results.push_back(RTB);
10007 Results.push_back(RTB.getValue(1));
10008 Results.push_back(RTB.getValue(2));
10009 break;
10010 }
10012 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10013 Intrinsic::loop_decrement)
10014 break;
10015
10016 assert(N->getValueType(0) == MVT::i1 &&
10017 "Unexpected result type for CTR decrement intrinsic");
10018 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10019 N->getValueType(0));
10020 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10021 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10022 N->getOperand(1));
10023
10024 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10025 Results.push_back(NewInt.getValue(1));
10026 break;
10027 }
10028 case ISD::VAARG: {
10029 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10030 return;
10031
10032 EVT VT = N->getValueType(0);
10033
10034 if (VT == MVT::i64) {
10035 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10036
10037 Results.push_back(NewNode);
10038 Results.push_back(NewNode.getValue(1));
10039 }
10040 return;
10041 }
10042 case ISD::FP_TO_SINT:
10043 case ISD::FP_TO_UINT:
10044 // LowerFP_TO_INT() can only handle f32 and f64.
10045 if (N->getOperand(0).getValueType() == MVT::ppcf128)
10046 return;
10047 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10048 return;
10049 case ISD::TRUNCATE: {
10050 EVT TrgVT = N->getValueType(0);
10051 if (TrgVT.isVector() &&
10052 isOperationCustom(N->getOpcode(), TrgVT) &&
10053 N->getOperand(0).getValueType().getSizeInBits() <= 128)
10054 Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10055 return;
10056 }
10057 case ISD::BITCAST:
10058 // Don't handle bitcast here.
10059 return;
10060 }
10061}
10062
10063//===----------------------------------------------------------------------===//
10064// Other Lowering Code
10065//===----------------------------------------------------------------------===//
10066
10068 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10069 Function *Func = Intrinsic::getDeclaration(M, Id);
10070 return Builder.CreateCall(Func, {});
10071}
10072
10073// The mappings for emitLeading/TrailingFence is taken from
10074// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10076 Instruction *Inst,
10077 AtomicOrdering Ord) const {
10079 return callIntrinsic(Builder, Intrinsic::ppc_sync);
10080 if (isReleaseOrStronger(Ord))
10081 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10082 return nullptr;
10083}
10084
10086 Instruction *Inst,
10087 AtomicOrdering Ord) const {
10088 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10089 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10090 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10091 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10092 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10093 return Builder.CreateCall(
10095 Builder.GetInsertBlock()->getParent()->getParent(),
10096 Intrinsic::ppc_cfence, {Inst->getType()}),
10097 {Inst});
10098 // FIXME: Can use isync for rmw operation.
10099 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10100 }
10101 return nullptr;
10102}
10103
10106 unsigned AtomicSize,
10107 unsigned BinOpcode,
10108 unsigned CmpOpcode,
10109 unsigned CmpPred) const {
10110 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10111 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10112
10113 auto LoadMnemonic = PPC::LDARX;
10114 auto StoreMnemonic = PPC::STDCX;
10115 switch (AtomicSize) {
10116 default:
10117 llvm_unreachable("Unexpected size of atomic entity");
10118 case 1:
10119 LoadMnemonic = PPC::LBARX;
10120 StoreMnemonic = PPC::STBCX;
10121 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10122 break;
10123 case 2:
10124 LoadMnemonic = PPC::LHARX;
10125 StoreMnemonic = PPC::STHCX;
10126 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10127 break;
10128 case 4:
10129 LoadMnemonic = PPC::LWARX;
10130 StoreMnemonic = PPC::STWCX;
10131 break;
10132 case 8:
10133 LoadMnemonic = PPC::LDARX;
10134 StoreMnemonic = PPC::STDCX;
10135 break;
10136 }
10137
10138 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10139 MachineFunction *F = BB->getParent();
10141
10142 Register dest = MI.getOperand(0).getReg();
10143 Register ptrA = MI.getOperand(1).getReg();
10144 Register ptrB = MI.getOperand(2).getReg();
10145 Register incr = MI.getOperand(3).getReg();
10146 DebugLoc dl = MI.getDebugLoc();
10147
10148 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10149 MachineBasicBlock *loop2MBB =
10150 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10151 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10152 F->insert(It, loopMBB);
10153 if (CmpOpcode)
10154 F->insert(It, loop2MBB);
10155 F->insert(It, exitMBB);
10156 exitMBB->splice(exitMBB->begin(), BB,
10157 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10159
10160 MachineRegisterInfo &RegInfo = F->getRegInfo();
10161 Register TmpReg = (!BinOpcode) ? incr :
10162 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10163 : &PPC::GPRCRegClass);
10164
10165 // thisMBB:
10166 // ...
10167 // fallthrough --> loopMBB
10168 BB->addSuccessor(loopMBB);
10169
10170 // loopMBB:
10171 // l[wd]arx dest, ptr
10172 // add r0, dest, incr
10173 // st[wd]cx. r0, ptr
10174 // bne- loopMBB
10175 // fallthrough --> exitMBB
10176
10177 // For max/min...
10178 // loopMBB:
10179 // l[wd]arx dest, ptr
10180 // cmpl?[wd] incr, dest
10181 // bgt exitMBB
10182 // loop2MBB:
10183 // st[wd]cx. dest, ptr
10184 // bne- loopMBB
10185 // fallthrough --> exitMBB
10186
10187 BB = loopMBB;
10188 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10189 .addReg(ptrA).addReg(ptrB);
10190 if (BinOpcode)
10191 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10192 if (CmpOpcode) {
10193 // Signed comparisons of byte or halfword values must be sign-extended.
10194 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10195 unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10196 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10197 ExtReg).addReg(dest);
10198 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10199 .addReg(incr).addReg(ExtReg);
10200 } else
10201 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10202 .addReg(incr).addReg(dest);
10203
10204 BuildMI(BB, dl, TII->get(PPC::BCC))
10205 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10206 BB->addSuccessor(loop2MBB);
10207 BB->addSuccessor(exitMBB);
10208 BB = loop2MBB;
10209 }
10210 BuildMI(BB, dl, TII->get(StoreMnemonic))
10211 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10212 BuildMI(BB, dl, TII->get(PPC::BCC))
10213 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10214 BB->addSuccessor(loopMBB);
10215 BB->addSuccessor(exitMBB);
10216
10217 // exitMBB:
10218 // ...
10219 BB = exitMBB;
10220 return BB;
10221}
10222
10225 bool is8bit, // operation
10226 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10227 // If we support part-word atomic mnemonics, just use them
10228 if (Subtarget.hasPartwordAtomics())
10229 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10230 CmpPred);
10231
10232 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10233 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10234 // In 64 bit mode we have to use 64 bits for addresses, even though the
10235 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10236 // registers without caring whether they're 32 or 64, but here we're
10237 // doing actual arithmetic on the addresses.
10238 bool is64bit = Subtarget.isPPC64();
10239 bool isLittleEndian = Subtarget.isLittleEndian();
10240 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10241
10242 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10243 MachineFunction *F = BB->getParent();
10245
10246 unsigned dest = MI.getOperand(0).getReg();
10247 unsigned ptrA = MI.getOperand(1).getReg();
10248 unsigned ptrB = MI.getOperand(2).getReg();
10249 unsigned incr = MI.getOperand(3).getReg();
10250 DebugLoc dl = MI.getDebugLoc();
10251
10252 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10253 MachineBasicBlock *loop2MBB =
10254 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10255 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10256 F->insert(It, loopMBB);
10257 if (CmpOpcode)
10258 F->insert(It, loop2MBB);
10259 F->insert(It, exitMBB);
10260 exitMBB->splice(exitMBB->begin(), BB,
10261 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10263
10264 MachineRegisterInfo &RegInfo = F->getRegInfo();
10265 const TargetRegisterClass *RC =
10266 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10267 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10268
10269 Register PtrReg = RegInfo.createVirtualRegister(RC);
10270 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10271 Register ShiftReg =
10272 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10273 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10274 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10275 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10276 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10277 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10278 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10279 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10280 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10281 Register Ptr1Reg;
10282 Register TmpReg =
10283 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10284
10285 // thisMBB:
10286 // ...
10287 // fallthrough --> loopMBB
10288 BB->addSuccessor(loopMBB);
10289
10290 // The 4-byte load must be aligned, while a char or short may be
10291 // anywhere in the word. Hence all this nasty bookkeeping code.
10292 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10293 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10294 // xori shift, shift1, 24 [16]
10295 // rlwinm ptr, ptr1, 0, 0, 29
10296 // slw incr2, incr, shift
10297 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10298 // slw mask, mask2, shift
10299 // loopMBB:
10300 // lwarx tmpDest, ptr
10301 // add tmp, tmpDest, incr2
10302 // andc tmp2, tmpDest, mask
10303 // and tmp3, tmp, mask
10304 // or tmp4, tmp3, tmp2
10305 // stwcx. tmp4, ptr
10306 // bne- loopMBB
10307 // fallthrough --> exitMBB
10308 // srw dest, tmpDest, shift
10309 if (ptrA != ZeroReg) {
10310 Ptr1Reg = RegInfo.createVirtualRegister(RC);
10311 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10312 .addReg(ptrA)
10313 .addReg(ptrB);
10314 } else {
10315 Ptr1Reg = ptrB;
10316 }
10317 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10318 // mode.
10319 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10320 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10321 .addImm(3)
10322 .addImm(27)
10323 .addImm(is8bit ? 28 : 27);
10324 if (!isLittleEndian)
10325 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10326 .addReg(Shift1Reg)
10327 .addImm(is8bit ? 24 : 16);
10328 if (is64bit)
10329 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10330 .addReg(Ptr1Reg)
10331 .addImm(0)
10332 .addImm(61);
10333 else
10334 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10335 .addReg(Ptr1Reg)
10336 .addImm(0)
10337 .addImm(0)
10338 .addImm(29);
10339 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10340 if (is8bit)
10341 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10342 else {
10343 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10344 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10345 .addReg(Mask3Reg)
10346 .addImm(65535);
10347 }
10348 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10349 .addReg(Mask2Reg)
10350 .addReg(ShiftReg);
10351
10352 BB = loopMBB;
10353 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10354 .addReg(ZeroReg)
10355 .addReg(PtrReg);
10356 if (BinOpcode)
10357 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10358 .addReg(Incr2Reg)
10359 .addReg(TmpDestReg);
10360 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10361 .addReg(TmpDestReg)
10362 .addReg(MaskReg);
10363 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10364 if (CmpOpcode) {
10365 // For unsigned comparisons, we can directly compare the shifted values.
10366 // For signed comparisons we shift and sign extend.
10367 unsigned SReg = RegInfo.createVirtualRegister(GPRC);
10368 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10369 .addReg(TmpDestReg)
10370 .addReg(MaskReg);
10371 unsigned ValueReg = SReg;
10372 unsigned CmpReg = Incr2Reg;
10373 if (CmpOpcode == PPC::CMPW) {
10374 ValueReg = RegInfo.createVirtualRegister(GPRC);
10375 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10376 .addReg(SReg)
10377 .addReg(ShiftReg);
10378 unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
10379 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10380 .addReg(ValueReg);
10381 ValueReg = ValueSReg;
10382 CmpReg = incr;
10383 }
10384 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10385 .addReg(CmpReg)
10386 .addReg(ValueReg);
10387 BuildMI(BB, dl, TII->get(PPC::BCC))
10388 .addImm(CmpPred)
10389 .addReg(PPC::CR0)
10390 .addMBB(exitMBB);
10391 BB->addSuccessor(loop2MBB);
10392 BB->addSuccessor(exitMBB);
10393 BB = loop2MBB;
10394 }
10395 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10396 BuildMI(BB, dl, TII->get(PPC::STWCX))
10397 .addReg(Tmp4Reg)
10398 .addReg(ZeroReg)
10399 .addReg(PtrReg);
10400 BuildMI(BB, dl, TII->get(PPC::BCC))
10402 .addReg(PPC::CR0)
10403 .addMBB(loopMBB);
10404 BB->addSuccessor(loopMBB);
10405 BB->addSuccessor(exitMBB);
10406
10407 // exitMBB:
10408 // ...
10409 BB = exitMBB;
10410 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10411 .addReg(TmpDestReg)
10412 .addReg(ShiftReg);
10413 return BB;
10414}
10415
10418 MachineBasicBlock *MBB) const {
10419 DebugLoc DL = MI.getDebugLoc();
10420 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10421 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10422
10423 MachineFunction *MF = MBB->getParent();
10425
10426 const BasicBlock *BB = MBB->getBasicBlock();
10428
10429 unsigned DstReg = MI.getOperand(0).getReg();
10430 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10431 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10432 unsigned mainDstReg = MRI.createVirtualRegister(RC);
10433 unsigned restoreDstReg = MRI.createVirtualRegister(RC);
10434
10435 MVT PVT = getPointerTy(MF->getDataLayout());
10436 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10437 "Invalid Pointer Size!");
10438 // For v = setjmp(buf), we generate
10439 //
10440 // thisMBB:
10441 // SjLjSetup mainMBB
10442 // bl mainMBB
10443 // v_restore = 1
10444 // b sinkMBB
10445 //
10446 // mainMBB:
10447 // buf[LabelOffset] = LR
10448 // v_main = 0
10449 //
10450 // sinkMBB:
10451 // v = phi(main, restore)
10452 //
10453
10454 MachineBasicBlock *thisMBB = MBB;
10455 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10456 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10457 MF->insert(I, mainMBB);
10458 MF->insert(I, sinkMBB);
10459
10461
10462 // Transfer the remainder of BB and its successor edges to sinkMBB.
10463 sinkMBB->splice(sinkMBB->begin(), MBB,
10464 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10465 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10466
10467 // Note that the structure of the jmp_buf used here is not compatible
10468 // with that used by libc, and is not designed to be. Specifically, it
10469 // stores only those 'reserved' registers that LLVM does not otherwise
10470 // understand how to spill. Also, by convention, by the time this
10471 // intrinsic is called, Clang has already stored the frame address in the
10472 // first slot of the buffer and stack address in the third. Following the
10473 // X86 target code, we'll store the jump address in the second slot. We also
10474 // need to save the TOC pointer (R2) to handle jumps between shared
10475 // libraries, and that will be stored in the fourth slot. The thread
10476 // identifier (R13) is not affected.
10477
10478 // thisMBB:
10479 const int64_t LabelOffset = 1 * PVT.getStoreSize();
10480 const int64_t TOCOffset = 3 * PVT.getStoreSize();
10481 const int64_t BPOffset = 4 * PVT.getStoreSize();
10482
10483 // Prepare IP either in reg.
10484 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10485 unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
10486 unsigned BufReg = MI.getOperand(1).getReg();
10487
10488 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
10490 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10491 .addReg(PPC::X2)
10492 .addImm(TOCOffset)
10493 .addReg(BufReg)
10494 .cloneMemRefs(MI);
10495 }
10496
10497 // Naked functions never have a base pointer, and so we use r1. For all
10498 // other functions, this decision must be delayed until during PEI.
10499 unsigned BaseReg;
10500 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10501 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10502 else
10503 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10504
10505 MIB = BuildMI(*thisMBB, MI, DL,
10506 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10507 .addReg(BaseReg)
10508 .addImm(BPOffset)
10509 .addReg(BufReg)
10510 .cloneMemRefs(MI);
10511
10512 // Setup
10513 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
10514 MIB.addRegMask(TRI->getNoPreservedMask());
10515
10516 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
10517
10518 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
10519 .addMBB(mainMBB);
10520 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
10521
10522 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
10523 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
10524
10525 // mainMBB:
10526 // mainDstReg = 0
10527 MIB =
10528 BuildMI(mainMBB, DL,
10529 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
10530
10531 // Store IP
10532 if (Subtarget.isPPC64()) {
10533 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
10534 .addReg(LabelReg)
10535 .addImm(LabelOffset)
10536 .addReg(BufReg);
10537 } else {
10538 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
10539 .addReg(LabelReg)
10540 .addImm(LabelOffset)
10541 .addReg(BufReg);
10542 }
10543 MIB.cloneMemRefs(MI);
10544
10545 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
10546 mainMBB->addSuccessor(sinkMBB);
10547
10548 // sinkMBB:
10549 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
10550 TII->get(PPC::PHI), DstReg)
10551 .addReg(mainDstReg).addMBB(mainMBB)
10552 .addReg(restoreDstReg).addMBB(thisMBB);
10553
10554 MI.eraseFromParent();
10555 return sinkMBB;
10556}
10557
10560 MachineBasicBlock *MBB) const {
10561 DebugLoc DL = MI.getDebugLoc();
10562 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10563
10564 MachineFunction *MF = MBB->getParent();
10566
10567 MVT PVT = getPointerTy(MF->getDataLayout());
10568 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10569 "Invalid Pointer Size!");
10570
10571 const TargetRegisterClass *RC =
10572 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10573 unsigned Tmp = MRI.createVirtualRegister(RC);
10574 // Since FP is only updated here but NOT referenced, it's treated as GPR.
10575 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
10576 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
10577 unsigned BP =
10578 (PVT == MVT::i64)
10579 ? PPC::X30
10580 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10581 : PPC::R30);
10582
10584
10585 const int64_t LabelOffset = 1 * PVT.getStoreSize();
10586 const int64_t SPOffset = 2 * PVT.getStoreSize();
10587 const int64_t TOCOffset = 3 * PVT.getStoreSize();
10588 const int64_t BPOffset = 4 * PVT.getStoreSize();
10589
10590 unsigned BufReg = MI.getOperand(0).getReg();
10591
10592 // Reload FP (the jumped-to function may not have had a
10593 // frame pointer, and if so, then its r31 will be restored
10594 // as necessary).
10595 if (PVT == MVT::i64) {
10596 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
10597 .addImm(0)
10598 .addReg(BufReg);
10599 } else {
10600 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
10601 .addImm(0)
10602 .addReg(BufReg);
10603 }
10604 MIB.cloneMemRefs(MI);
10605
10606 // Reload IP
10607 if (PVT == MVT::i64) {
10608 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
10609 .addImm(LabelOffset)
10610 .addReg(BufReg);
10611 } else {
10612 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
10613 .addImm(LabelOffset)
10614 .addReg(BufReg);
10615 }
10616 MIB.cloneMemRefs(MI);
10617
10618 // Reload SP
10619 if (PVT == MVT::i64) {
10620 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
10621 .addImm(SPOffset)
10622 .addReg(BufReg);
10623 } else {
10624 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
10625 .addImm(SPOffset)
10626 .addReg(BufReg);
10627 }
10628 MIB.cloneMemRefs(MI);
10629
10630 // Reload BP
10631 if (PVT == MVT::i64) {
10632 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
10633 .addImm(BPOffset)
10634 .addReg(BufReg);
10635 } else {
10636 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
10637 .addImm(BPOffset)
10638 .addReg(BufReg);
10639 }
10640 MIB.cloneMemRefs(MI);
10641
10642 // Reload TOC
10643 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
10645 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
10646 .addImm(TOCOffset)
10647 .addReg(BufReg)
10648 .cloneMemRefs(MI);
10649 }
10650
10651 // Jump
10652 BuildMI(*MBB, MI, DL,
10653 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
10654 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
10655
10656 MI.eraseFromParent();
10657 return MBB;
10658}
10659
10662 MachineBasicBlock *BB) const {
10663 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
10664 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10665 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
10666 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
10667 // Call lowering should have added an r2 operand to indicate a dependence
10668 // on the TOC base pointer value. It can't however, because there is no
10669 // way to mark the dependence as implicit there, and so the stackmap code
10670 // will confuse it with a regular operand. Instead, add the dependence
10671 // here.
10672 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
10673 }
10674
10675 return emitPatchPoint(MI, BB);
10676 }
10677
10678 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
10679 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
10680 return emitEHSjLjSetJmp(MI, BB);
10681 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
10682 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
10683 return emitEHSjLjLongJmp(MI, BB);
10684 }
10685
10686 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10687
10688 // To "insert" these instructions we actually have to insert their
10689 // control-flow patterns.
10690 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10692
10693 MachineFunction *F = BB->getParent();
10694
10695 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10696 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
10697 MI.getOpcode() == PPC::SELECT_I8) {
10699 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10700 MI.getOpcode() == PPC::SELECT_CC_I8)
10701 Cond.push_back(MI.getOperand(4));
10702 else
10704 Cond.push_back(MI.getOperand(1));
10705
10706 DebugLoc dl = MI.getDebugLoc();
10707 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
10708 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
10709 } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
10710 MI.getOpcode() == PPC::SELECT_CC_I8 ||
10711 MI.getOpcode() == PPC::SELECT_CC_F4 ||
10712 MI.getOpcode() == PPC::SELECT_CC_F8 ||
10713 MI.getOpcode() == PPC::SELECT_CC_F16 ||
10714 MI.getOpcode() == PPC::SELECT_CC_QFRC ||
10715 MI.getOpcode() == PPC::SELECT_CC_QSRC ||
10716 MI.getOpcode() == PPC::SELECT_CC_QBRC ||
10717 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
10718 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
10719 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
10720 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
10721 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
10722 MI.getOpcode() == PPC::SELECT_CC_SPE ||
10723 MI.getOpcode() == PPC::SELECT_I4 ||
10724 MI.getOpcode() == PPC::SELECT_I8 ||
10725 MI.getOpcode() == PPC::SELECT_F4 ||
10726 MI.getOpcode() == PPC::SELECT_F8 ||
10727 MI.getOpcode() == PPC::SELECT_F16 ||
10728 MI.getOpcode() == PPC::SELECT_QFRC ||
10729 MI.getOpcode() == PPC::SELECT_QSRC ||
10730 MI.getOpcode() == PPC::SELECT_QBRC ||
10731 MI.getOpcode() == PPC::SELECT_SPE ||
10732 MI.getOpcode() == PPC::SELECT_SPE4 ||
10733 MI.getOpcode() == PPC::SELECT_VRRC ||
10734 MI.getOpcode() == PPC::SELECT_VSFRC ||
10735 MI.getOpcode() == PPC::SELECT_VSSRC ||
10736 MI.getOpcode() == PPC::SELECT_VSRC) {
10737 // The incoming instruction knows the destination vreg to set, the
10738 // condition code register to branch on, the true/false values to
10739 // select between, and a branch opcode to use.
10740
10741 // thisMBB:
10742 // ...
10743 // TrueVal = ...
10744 // cmpTY ccX, r1, r2
10745 // bCC copy1MBB
10746 // fallthrough --> copy0MBB
10747 MachineBasicBlock *thisMBB = BB;
10748 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10749 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10750 DebugLoc dl = MI.getDebugLoc();
10751 F->insert(It, copy0MBB);
10752 F->insert(It, sinkMBB);
10753
10754 // Transfer the remainder of BB and its successor edges to sinkMBB.
10755 sinkMBB->splice(sinkMBB->begin(), BB,
10756 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10758
10759 // Next, add the true and fallthrough blocks as its successors.
10760 BB->addSuccessor(copy0MBB);
10761 BB->addSuccessor(sinkMBB);
10762
10763 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
10764 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
10765 MI.getOpcode() == PPC::SELECT_F16 ||
10766 MI.getOpcode() == PPC::SELECT_SPE4 ||
10767 MI.getOpcode() == PPC::SELECT_SPE ||
10768 MI.getOpcode() == PPC::SELECT_QFRC ||
10769 MI.getOpcode() == PPC::SELECT_QSRC ||
10770 MI.getOpcode() == PPC::SELECT_QBRC ||
10771 MI.getOpcode() == PPC::SELECT_VRRC ||
10772 MI.getOpcode() == PPC::SELECT_VSFRC ||
10773 MI.getOpcode() == PPC::SELECT_VSSRC ||
10774 MI.getOpcode() == PPC::SELECT_VSRC) {
10775 BuildMI(BB, dl, TII->get(PPC::BC))
10776 .addReg(MI.getOperand(1).getReg())
10777 .addMBB(sinkMBB);
10778 } else {
10779 unsigned SelectPred = MI.getOperand(4).getImm();
10780 BuildMI(BB, dl, TII->get(PPC::BCC))
10781 .addImm(SelectPred)
10782 .addReg(MI.getOperand(1).getReg())
10783 .addMBB(sinkMBB);
10784 }
10785
10786 // copy0MBB:
10787 // %FalseValue = ...
10788 // # fallthrough to sinkMBB
10789 BB = copy0MBB;
10790
10791 // Update machine-CFG edges
10792 BB->addSuccessor(sinkMBB);
10793
10794 // sinkMBB:
10795 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10796 // ...
10797 BB = sinkMBB;
10798 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
10799 .addReg(MI.getOperand(3).getReg())
10800 .addMBB(copy0MBB)
10801 .addReg(MI.getOperand(2).getReg())
10802 .addMBB(thisMBB);
10803 } else if (MI.getOpcode() == PPC::ReadTB) {
10804 // To read the 64-bit time-base register on a 32-bit target, we read the
10805 // two halves. Should the counter have wrapped while it was being read, we
10806 // need to try again.
10807 // ...
10808 // readLoop:
10809 // mfspr Rx,TBU # load from TBU
10810 // mfspr Ry,TB # load from TB
10811 // mfspr Rz,TBU # load from TBU
10812 // cmpw crX,Rx,Rz # check if 'old'='new'
10813 // bne readLoop # branch if they're not equal
10814 // ...
10815
10816 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
10817 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10818 DebugLoc dl = MI.getDebugLoc();
10819 F->insert(It, readMBB);
10820 F->insert(It, sinkMBB);
10821
10822 // Transfer the remainder of BB and its successor edges to sinkMBB.
10823 sinkMBB->splice(sinkMBB->begin(), BB,
10824 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10826
10827 BB->addSuccessor(readMBB);
10828 BB = readMBB;
10829
10830 MachineRegisterInfo &RegInfo = F->getRegInfo();
10831 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10832 unsigned LoReg = MI.getOperand(0).getReg();
10833 unsigned HiReg = MI.getOperand(1).getReg();
10834
10835 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
10836 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
10837 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
10838
10839 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
10840
10841 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
10842 .addReg(HiReg)
10843 .addReg(ReadAgainReg);
10844 BuildMI(BB, dl, TII->get(PPC::BCC))
10846 .addReg(CmpReg)
10847 .addMBB(readMBB);
10848
10849 BB->addSuccessor(readMBB);
10850 BB->addSuccessor(sinkMBB);
10851 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
10852 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
10853 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
10854 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
10855 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
10856 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
10857 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
10858 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
10859
10860 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
10861 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
10862 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
10863 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
10864 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
10865 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
10866 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
10867 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
10868
10869 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
10870 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
10871 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
10872 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
10873 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
10874 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
10875 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
10876 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
10877
10878 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
10879 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
10880 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
10881 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
10882 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
10883 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
10884 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
10885 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
10886
10887 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
10888 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
10889 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
10890 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
10891 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
10892 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
10893 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
10894 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
10895
10896 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
10897 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
10898 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
10899 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
10900 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
10901 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
10902 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
10903 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
10904
10905 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
10906 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
10907 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
10908 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
10909 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
10910 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
10911 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
10912 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
10913
10914 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
10915 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
10916 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
10917 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
10918 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
10919 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
10920 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
10921 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
10922
10923 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
10924 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
10925 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
10926 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
10927 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
10928 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
10929 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
10930 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
10931
10932 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
10933 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
10934 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
10935 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
10936 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
10937 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
10938 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
10939 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
10940
10941 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
10942 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
10943 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
10944 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
10945 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
10946 BB = EmitAtomicBinary(MI, BB, 4, 0);
10947 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
10948 BB = EmitAtomicBinary(MI, BB, 8, 0);
10949 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
10950 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
10951 (Subtarget.hasPartwordAtomics() &&
10952 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
10953 (Subtarget.hasPartwordAtomics() &&
10954 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
10955 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
10956
10957 auto LoadMnemonic = PPC::LDARX;
10958 auto StoreMnemonic = PPC::STDCX;
10959 switch (MI.getOpcode()) {
10960 default:
10961 llvm_unreachable("Compare and swap of unknown size");
10962 case PPC::ATOMIC_CMP_SWAP_I8:
10963 LoadMnemonic = PPC::LBARX;
10964 StoreMnemonic = PPC::STBCX;
10965 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10966 break;
10967 case PPC::ATOMIC_CMP_SWAP_I16:
10968 LoadMnemonic = PPC::LHARX;
10969 StoreMnemonic = PPC::STHCX;
10970 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
10971 break;
10972 case PPC::ATOMIC_CMP_SWAP_I32:
10973 LoadMnemonic = PPC::LWARX;
10974 StoreMnemonic = PPC::STWCX;
10975 break;
10976 case PPC::ATOMIC_CMP_SWAP_I64:
10977 LoadMnemonic = PPC::LDARX;
10978 StoreMnemonic = PPC::STDCX;
10979 break;
10980 }
10981 unsigned dest = MI.getOperand(0).getReg();
10982 unsigned ptrA = MI.getOperand(1).getReg();
10983 unsigned ptrB = MI.getOperand(2).getReg();
10984 unsigned oldval = MI.getOperand(3).getReg();
10985 unsigned newval = MI.getOperand(4).getReg();
10986 DebugLoc dl = MI.getDebugLoc();
10987
10988 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
10989 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
10990 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
10991 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10992 F->insert(It, loop1MBB);
10993 F->insert(It, loop2MBB);
10994 F->insert(It, midMBB);
10995 F->insert(It, exitMBB);
10996 exitMBB->splice(exitMBB->begin(), BB,
10997 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10999
11000 // thisMBB:
11001 // ...
11002 // fallthrough --> loopMBB
11003 BB->addSuccessor(loop1MBB);
11004
11005 // loop1MBB:
11006 // l[bhwd]arx dest, ptr
11007 // cmp[wd] dest, oldval
11008 // bne- midMBB
11009 // loop2MBB:
11010 // st[bhwd]cx. newval, ptr
11011 // bne- loopMBB
11012 // b exitBB
11013 // midMBB:
11014 // st[bhwd]cx. dest, ptr
11015 // exitBB:
11016 BB = loop1MBB;
11017 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11018 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11019 .addReg(oldval)
11020 .addReg(dest);
11021 BuildMI(BB, dl, TII->get(PPC::BCC))
11023 .addReg(PPC::CR0)
11024 .addMBB(midMBB);
11025 BB->addSuccessor(loop2MBB);
11026 BB->addSuccessor(midMBB);
11027
11028 BB = loop2MBB;
11029 BuildMI(BB, dl, TII->get(StoreMnemonic))
11030 .addReg(newval)
11031 .addReg(ptrA)
11032 .addReg(ptrB);
11033 BuildMI(BB, dl, TII->get(PPC::BCC))
11035 .addReg(PPC::CR0)
11036 .addMBB(loop1MBB);
11037 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11038 BB->addSuccessor(loop1MBB);
11039 BB->addSuccessor(exitMBB);
11040
11041 BB = midMBB;
11042 BuildMI(BB, dl, TII->get(StoreMnemonic))
11043 .addReg(dest)
11044 .addReg(ptrA)
11045 .addReg(ptrB);
11046 BB->addSuccessor(exitMBB);
11047
11048 // exitMBB:
11049 // ...
11050 BB = exitMBB;
11051 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11052 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11053 // We must use 64-bit registers for addresses when targeting 64-bit,
11054 // since we're actually doing arithmetic on them. Other registers
11055 // can be 32-bit.
11056 bool is64bit = Subtarget.isPPC64();
11057 bool isLittleEndian = Subtarget.isLittleEndian();
11058 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11059
11060 unsigned dest = MI.getOperand(0).getReg();
11061 unsigned ptrA = MI.getOperand(1).getReg();
11062 unsigned ptrB = MI.getOperand(2).getReg();
11063 unsigned oldval = MI.getOperand(3).getReg();
11064 unsigned newval = MI.getOperand(4).getReg();
11065 DebugLoc dl = MI.getDebugLoc();
11066
11067 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11068 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11069 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11070 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11071 F->insert(It, loop1MBB);
11072 F->insert(It, loop2MBB);
11073 F->insert(It, midMBB);
11074 F->insert(It, exitMBB);
11075 exitMBB->splice(exitMBB->begin(), BB,
11076 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11078
11079 MachineRegisterInfo &RegInfo = F->getRegInfo();
11080 const TargetRegisterClass *RC =
11081 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11082 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11083
11084 Register PtrReg = RegInfo.createVirtualRegister(RC);
11085 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11086 Register ShiftReg =
11087 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11088 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11089 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11090 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11091 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11092 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11093 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11094 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11095 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11096 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11097 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11098 Register Ptr1Reg;
11099 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11100 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11101 // thisMBB:
11102 // ...
11103 // fallthrough --> loopMBB
11104 BB->addSuccessor(loop1MBB);
11105
11106 // The 4-byte load must be aligned, while a char or short may be
11107 // anywhere in the word. Hence all this nasty bookkeeping code.
11108 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11109 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11110 // xori shift, shift1, 24 [16]
11111 // rlwinm ptr, ptr1, 0, 0, 29
11112 // slw newval2, newval, shift
11113 // slw oldval2, oldval,shift
11114 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11115 // slw mask, mask2, shift
11116 // and newval3, newval2, mask
11117 // and oldval3, oldval2, mask
11118 // loop1MBB:
11119 // lwarx tmpDest, ptr
11120 // and tmp, tmpDest, mask
11121 // cmpw tmp, oldval3
11122 // bne- midMBB
11123 // loop2MBB:
11124 // andc tmp2, tmpDest, mask
11125 // or tmp4, tmp2, newval3
11126 // stwcx. tmp4, ptr
11127 // bne- loop1MBB
11128 // b exitBB
11129 // midMBB:
11130 // stwcx. tmpDest, ptr
11131 // exitBB:
11132 // srw dest, tmpDest, shift
11133 if (ptrA != ZeroReg) {
11134 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11135 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11136 .addReg(ptrA)
11137 .addReg(ptrB);
11138 } else {
11139 Ptr1Reg = ptrB;
11140 }
11141
11142 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11143 // mode.
11144 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11145 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11146 .addImm(3)
11147 .addImm(27)
11148 .addImm(is8bit ? 28 : 27);
11149 if (!isLittleEndian)
11150 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11151 .addReg(Shift1Reg)
11152 .addImm(is8bit ? 24 : 16);
11153 if (is64bit)
11154 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11155 .addReg(Ptr1Reg)
11156 .addImm(0)
11157 .addImm(61);
11158 else
11159 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11160 .addReg(Ptr1Reg)
11161 .addImm(0)
11162 .addImm(0)
11163 .addImm(29);
11164 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11165 .addReg(newval)
11166 .addReg(ShiftReg);
11167 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11168 .addReg(oldval)
11169 .addReg(ShiftReg);
11170 if (is8bit)
11171 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11172 else {
11173 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11174 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11175 .addReg(Mask3Reg)
11176 .addImm(65535);
11177 }
11178 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11179 .addReg(Mask2Reg)
11180 .addReg(ShiftReg);
11181 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11182 .addReg(NewVal2Reg)
11183 .addReg(MaskReg);
11184 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11185 .addReg(OldVal2Reg)
11186 .addReg(MaskReg);
11187
11188 BB = loop1MBB;
11189 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11190 .addReg(ZeroReg)
11191 .addReg(PtrReg);
11192 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11193 .addReg(TmpDestReg)
11194 .addReg(MaskReg);
11195 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11196 .addReg(TmpReg)
11197 .addReg(OldVal3Reg);
11198 BuildMI(BB, dl, TII->get(PPC::BCC))
11200 .addReg(PPC::CR0)
11201 .addMBB(midMBB);
11202 BB->addSuccessor(loop2MBB);
11203 BB->addSuccessor(midMBB);
11204
11205 BB = loop2MBB;
11206 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11207 .addReg(TmpDestReg)
11208 .addReg(MaskReg);
11209 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11210 .addReg(Tmp2Reg)
11211 .addReg(NewVal3Reg);
11212 BuildMI(BB, dl, TII->get(PPC::STWCX))
11213 .addReg(Tmp4Reg)
11214 .addReg(ZeroReg)
11215 .addReg(PtrReg);
11216 BuildMI(BB, dl, TII->get(PPC::BCC))
11218 .addReg(PPC::CR0)
11219 .addMBB(loop1MBB);
11220 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11221 BB->addSuccessor(loop1MBB);
11222 BB->addSuccessor(exitMBB);
11223
11224 BB = midMBB;
11225 BuildMI(BB, dl, TII->get(PPC::STWCX))
11226 .addReg(TmpDestReg)
11227 .addReg(ZeroReg)
11228 .addReg(PtrReg);
11229 BB->addSuccessor(exitMBB);
11230
11231 // exitMBB:
11232 // ...
11233 BB = exitMBB;
11234 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11235 .addReg(TmpReg)
11236 .addReg(ShiftReg);
11237 } else if (MI.getOpcode() == PPC::FADDrtz) {
11238 // This pseudo performs an FADD with rounding mode temporarily forced
11239 // to round-to-zero. We emit this via custom inserter since the FPSCR
11240 // is not modeled at the SelectionDAG level.
11241 unsigned Dest = MI.getOperand(0).getReg();
11242 unsigned Src1 = MI.getOperand(1).getReg();
11243 unsigned Src2 = MI.getOperand(2).getReg();
11244 DebugLoc dl = MI.getDebugLoc();
11245
11246 MachineRegisterInfo &RegInfo = F->getRegInfo();
11247 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11248
11249 // Save FPSCR value.
11250 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11251
11252 // Set rounding mode to round-to-zero.
11253 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
11254 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
11255
11256 // Perform addition.
11257 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
11258
11259 // Restore FPSCR value.
11260 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11261 } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11262 MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
11263 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11264 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
11265 unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
11266 MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
11267 ? PPC::ANDIo8
11268 : PPC::ANDIo;
11269 bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
11270 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
11271
11272 MachineRegisterInfo &RegInfo = F->getRegInfo();
11273 unsigned Dest = RegInfo.createVirtualRegister(
11274 Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11275
11276 DebugLoc dl = MI.getDebugLoc();
11277 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
11278 .addReg(MI.getOperand(1).getReg())
11279 .addImm(1);
11280 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
11281 MI.getOperand(0).getReg())
11282 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
11283 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
11284 DebugLoc Dl = MI.getDebugLoc();
11285 MachineRegisterInfo &RegInfo = F->getRegInfo();
11286 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11287 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11288 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11289 MI.getOperand(0).getReg())
11290 .addReg(CRReg);
11291 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11292 DebugLoc Dl = MI.getDebugLoc();
11293 unsigned Imm = MI.getOperand(1).getImm();
11294 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11295 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11296 MI.getOperand(0).getReg())
11297 .addReg(PPC::CR0EQ);
11298 } else if (MI.getOpcode() == PPC::SETRNDi) {
11299 DebugLoc dl = MI.getDebugLoc();
11300 unsigned OldFPSCRReg = MI.getOperand(0).getReg();
11301
11302 // Save FPSCR value.
11303 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11304
11305 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11306 // the following settings:
11307 // 00 Round to nearest
11308 // 01 Round to 0
11309 // 10 Round to +inf
11310 // 11 Round to -inf
11311
11312 // When the operand is immediate, using the two least significant bits of
11313 // the immediate to set the bits 62:63 of FPSCR.
11314 unsigned Mode = MI.getOperand(1).getImm();
11315 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
11316 .addImm(31);
11317
11318 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
11319 .addImm(30);
11320 } else if (MI.getOpcode() == PPC::SETRND) {
11321 DebugLoc dl = MI.getDebugLoc();
11322
11323 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11324 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11325 // If the target doesn't have DirectMove, we should use stack to do the
11326 // conversion, because the target doesn't have the instructions like mtvsrd
11327 // or mfvsrd to do this conversion directly.
11328 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
11329 if (Subtarget.hasDirectMove()) {
11330 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
11331 .addReg(SrcReg);
11332 } else {
11333 // Use stack to do the register copy.
11334 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
11335 MachineRegisterInfo &RegInfo = F->getRegInfo();
11336 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
11337 if (RC == &PPC::F8RCRegClass) {
11338 // Copy register from F8RCRegClass to G8RCRegclass.
11339 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
11340 "Unsupported RegClass.");
11341
11342 StoreOp = PPC::STFD;
11343 LoadOp = PPC::LD;
11344 } else {
11345 // Copy register from G8RCRegClass to F8RCRegclass.
11346 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
11347 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
11348 "Unsupported RegClass.");
11349 }
11350
11351 MachineFrameInfo &MFI = F->getFrameInfo();
11352 int FrameIdx = MFI.CreateStackObject(8, 8, false);
11353
11354 MachineMemOperand *MMOStore = F->getMachineMemOperand(
11355 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11357 MFI.getObjectAlignment(FrameIdx));
11358
11359 // Store the SrcReg into the stack.
11360 BuildMI(*BB, MI, dl, TII->get(StoreOp))
11361 .addReg(SrcReg)
11362 .addImm(0)
11363 .addFrameIndex(FrameIdx)
11364 .addMemOperand(MMOStore);
11365
11366 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
11367 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
11369 MFI.getObjectAlignment(FrameIdx));
11370
11371 // Load from the stack where SrcReg is stored, and save to DestReg,
11372 // so we have done the RegClass conversion from RegClass::SrcReg to
11373 // RegClass::DestReg.
11374 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
11375 .addImm(0)
11376 .addFrameIndex(FrameIdx)
11377 .addMemOperand(MMOLoad);
11378 }
11379 };
11380
11381 unsigned OldFPSCRReg = MI.getOperand(0).getReg();
11382
11383 // Save FPSCR value.
11384 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11385
11386 // When the operand is gprc register, use two least significant bits of the
11387 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11388 //
11389 // copy OldFPSCRTmpReg, OldFPSCRReg
11390 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11391 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11392 // copy NewFPSCRReg, NewFPSCRTmpReg
11393 // mtfsf 255, NewFPSCRReg
11394 MachineOperand SrcOp = MI.getOperand(1);
11395 MachineRegisterInfo &RegInfo = F->getRegInfo();
11396 unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11397
11398 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
11399
11400 unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11401 unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11402
11403 // The first operand of INSERT_SUBREG should be a register which has
11404 // subregisters, we only care about its RegClass, so we should use an
11405 // IMPLICIT_DEF register.
11406 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
11407 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
11408 .addReg(ImDefReg)
11409 .add(SrcOp)
11410 .addImm(1);
11411
11412 unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
11413 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
11414 .addReg(OldFPSCRTmpReg)
11415 .addReg(ExtSrcReg)
11416 .addImm(0)
11417 .addImm(62);
11418
11419 unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11420 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
11421
11422 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11423 // bits of FPSCR.
11424 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
11425 .addImm(255)
11426 .addReg(NewFPSCRReg)
11427 .addImm(0)
11428 .addImm(0);
11429 } else {
11430 llvm_unreachable("Unexpected instr type to insert");
11431 }
11432
11433 MI.eraseFromParent(); // The pseudo instruction is gone now.
11434 return BB;
11435}
11436
11437//===----------------------------------------------------------------------===//
11438// Target Optimization Hooks
11439//===----------------------------------------------------------------------===//
11440
11441static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
11442 // For the estimates, convergence is quadratic, so we essentially double the
11443 // number of digits correct after every iteration. For both FRE and FRSQRTE,
11444 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11445 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11446 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
11447 if (VT.getScalarType() == MVT::f64)
11448 RefinementSteps++;
11449 return RefinementSteps;
11450}
11451
11452SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
11453 int Enabled, int &RefinementSteps,
11454 bool &UseOneConstNR,
11455 bool Reciprocal) const {
11456 EVT VT = Operand.getValueType();
11457 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
11458 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
11459 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11460 (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11461 (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11462 (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11463 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11464 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11465
11466 // The Newton-Raphson computation with a single constant does not provide
11467 // enough accuracy on some CPUs.
11468 UseOneConstNR = !Subtarget.needsTwoConstNR();
11469 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
11470 }
11471 return SDValue();
11472}
11473
11474SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
11475 int Enabled,
11476 int &RefinementSteps) const {
11477 EVT VT = Operand.getValueType();
11478 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
11479 (VT == MVT::f64 && Subtarget.hasFRE()) ||
11480 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
11481 (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
11482 (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
11483 (VT == MVT::v4f64 && Subtarget.hasQPX())) {
11484 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11485 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11486 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
11487 }
11488 return SDValue();
11489}
11490
11491unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11492 // Note: This functionality is used only when unsafe-fp-math is enabled, and
11493 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11494 // enabled for division), this functionality is redundant with the default
11495 // combiner logic (once the division -> reciprocal/multiply transformation
11496 // has taken place). As a result, this matters more for older cores than for
11497 // newer ones.
11498
11499 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11500 // reciprocal if there are two or more FDIVs (for embedded cores with only
11501 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11502 switch (Subtarget.getDarwinDirective()) {
11503 default:
11504 return 3;
11505 case PPC::DIR_440:
11506 case PPC::DIR_A2:
11507 case PPC::DIR_E500:
11508 case PPC::DIR_E500mc:
11509 case PPC::DIR_E5500:
11510 return 2;
11511 }
11512}
11513
11514// isConsecutiveLSLoc needs to work even if all adds have not yet been
11515// collapsed, and so we need to look through chains of them.
11517 int64_t& Offset, SelectionDAG &DAG) {
11518 if (DAG.isBaseWithConstantOffset(Loc)) {
11519 Base = Loc.getOperand(0);
11520 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
11521
11522 // The base might itself be a base plus an offset, and if so, accumulate
11523 // that as well.
11524 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
11525 }
11526}
11527
11528static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
11529 unsigned Bytes, int Dist,
11530 SelectionDAG &DAG) {
11531 if (VT.getSizeInBits() / 8 != Bytes)
11532 return false;
11533
11534 SDValue BaseLoc = Base->getBasePtr();
11535 if (Loc.getOpcode() == ISD::FrameIndex) {
11536 if (BaseLoc.getOpcode() != ISD::FrameIndex)
11537 return false;
11539 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
11540 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
11541 int FS = MFI.getObjectSize(FI);
11542 int BFS = MFI.getObjectSize(BFI);
11543 if (FS != BFS || FS != (int)Bytes) return false;
11544 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
11545 }
11546
11547 SDValue Base1 = Loc, Base2 = BaseLoc;
11548 int64_t Offset1 = 0, Offset2 = 0;
11549 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
11550 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
11551 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
11552 return true;
11553
11554 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11555 const GlobalValue *GV1 = nullptr;
11556 const GlobalValue *GV2 = nullptr;
11557 Offset1 = 0;
11558 Offset2 = 0;
11559 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
11560 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
11561 if (isGA1 && isGA2 && GV1 == GV2)
11562 return Offset1 == (Offset2 + Dist*Bytes);
11563 return false;
11564}
11565
11566// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11567// not enforce equality of the chain operands.
11569 unsigned Bytes, int Dist,
11570 SelectionDAG &DAG) {
11571 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
11572 EVT VT = LS->getMemoryVT();
11573 SDValue Loc = LS->getBasePtr();
11574 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
11575 }
11576
11577 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
11578 EVT VT;
11579 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11580 default: return false;
11581 case Intrinsic::ppc_qpx_qvlfd:
11582 case Intrinsic::ppc_qpx_qvlfda:
11583 VT = MVT::v4f64;
11584 break;
11585 case Intrinsic::ppc_qpx_qvlfs:
11586 case Intrinsic::ppc_qpx_qvlfsa:
11587 VT = MVT::v4f32;
11588 break;
11589 case Intrinsic::ppc_qpx_qvlfcd:
11590 case Intrinsic::ppc_qpx_qvlfcda:
11591 VT = MVT::v2f64;
11592 break;
11593 case Intrinsic::ppc_qpx_qvlfcs:
11594 case Intrinsic::ppc_qpx_qvlfcsa:
11595 VT = MVT::v2f32;
11596 break;
11597 case Intrinsic::ppc_qpx_qvlfiwa:
11598 case Intrinsic::ppc_qpx_qvlfiwz:
11599 case Intrinsic::ppc_altivec_lvx:
11600 case Intrinsic::ppc_altivec_lvxl:
11601 case Intrinsic::ppc_vsx_lxvw4x:
11602 case Intrinsic::ppc_vsx_lxvw4x_be:
11603 VT = MVT::v4i32;
11604 break;
11605 case Intrinsic::ppc_vsx_lxvd2x:
11606 case Intrinsic::ppc_vsx_lxvd2x_be:
11607 VT = MVT::v2f64;
11608 break;
11609 case Intrinsic::ppc_altivec_lvebx:
11610 VT = MVT::i8;
11611 break;
11612 case Intrinsic::ppc_altivec_lvehx:
11613 VT = MVT::i16;
11614 break;
11615 case Intrinsic::ppc_altivec_lvewx:
11616 VT = MVT::i32;
11617 break;
11618 }
11619
11620 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
11621 }
11622
11623 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
11624 EVT VT;
11625 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
11626 default: return false;
11627 case Intrinsic::ppc_qpx_qvstfd:
11628 case Intrinsic::ppc_qpx_qvstfda:
11629 VT = MVT::v4f64;
11630 break;
11631 case Intrinsic::ppc_qpx_qvstfs:
11632 case Intrinsic::ppc_qpx_qvstfsa:
11633 VT = MVT::v4f32;
11634 break;
11635 case Intrinsic::ppc_qpx_qvstfcd:
11636 case Intrinsic::ppc_qpx_qvstfcda:
11637 VT = MVT::v2f64;
11638 break;
11639 case Intrinsic::ppc_qpx_qvstfcs:
11640 case Intrinsic::ppc_qpx_qvstfcsa:
11641 VT = MVT::v2f32;
11642 break;
11643 case Intrinsic::ppc_qpx_qvstfiw:
11644 case Intrinsic::ppc_qpx_qvstfiwa:
11645 case Intrinsic::ppc_altivec_stvx:
11646 case Intrinsic::ppc_altivec_stvxl:
11647 case Intrinsic::ppc_vsx_stxvw4x:
11648 VT = MVT::v4i32;
11649 break;
11650 case Intrinsic::ppc_vsx_stxvd2x:
11651 VT = MVT::v2f64;
11652 break;
11653 case Intrinsic::ppc_vsx_stxvw4x_be:
11654 VT = MVT::v4i32;
11655 break;
11656 case Intrinsic::ppc_vsx_stxvd2x_be:
11657 VT = MVT::v2f64;
11658 break;
11659 case Intrinsic::ppc_altivec_stvebx:
11660 VT = MVT::i8;
11661 break;
11662 case Intrinsic::ppc_altivec_stvehx:
11663 VT = MVT::i16;
11664 break;
11665 case Intrinsic::ppc_altivec_stvewx:
11666 VT = MVT::i32;
11667 break;
11668 }
11669
11670 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
11671 }
11672
11673 return false;
11674}
11675
11676// Return true is there is a nearyby consecutive load to the one provided
11677// (regardless of alignment). We search up and down the chain, looking though
11678// token factors and other loads (but nothing else). As a result, a true result
11679// indicates that it is safe to create a new consecutive load adjacent to the
11680// load provided.
11682 SDValue Chain = LD->getChain();
11683 EVT VT = LD->getMemoryVT();
11684
11685 SmallSet<SDNode *, 16> LoadRoots;
11686 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
11687 SmallSet<SDNode *, 16> Visited;
11688
11689 // First, search up the chain, branching to follow all token-factor operands.
11690 // If we find a consecutive load, then we're done, otherwise, record all
11691 // nodes just above the top-level loads and token factors.
11692 while (!Queue.empty()) {
11693 SDNode *ChainNext = Queue.pop_back_val();
11694 if (!Visited.insert(ChainNext).second)
11695 continue;
11696
11697 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
11698 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11699 return true;
11700
11701 if (!Visited.count(ChainLD->getChain().getNode()))
11702 Queue.push_back(ChainLD->getChain().getNode());
11703 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
11704 for (const SDUse &O : ChainNext->ops())
11705 if (!Visited.count(O.getNode()))
11706 Queue.push_back(O.getNode());
11707 } else
11708 LoadRoots.insert(ChainNext);
11709 }
11710
11711 // Second, search down the chain, starting from the top-level nodes recorded
11712 // in the first phase. These top-level nodes are the nodes just above all
11713 // loads and token factors. Starting with their uses, recursively look though
11714 // all loads (just the chain uses) and token factors to find a consecutive
11715 // load.
11716 Visited.clear();
11717 Queue.clear();
11718
11719 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
11720 IE = LoadRoots.end(); I != IE; ++I) {
11721 Queue.push_back(*I);
11722
11723 while (!Queue.empty()) {
11724 SDNode *LoadRoot = Queue.pop_back_val();
11725 if (!Visited.insert(LoadRoot).second)
11726 continue;
11727
11728 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
11729 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
11730 return true;
11731
11732 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
11733 UE = LoadRoot->use_end(); UI != UE; ++UI)
11734 if (((isa<MemSDNode>(*UI) &&
11735 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
11736 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
11737 Queue.push_back(*UI);
11738 }
11739 }
11740
11741 return false;
11742}
11743
11744/// This function is called when we have proved that a SETCC node can be replaced
11745/// by subtraction (and other supporting instructions) so that the result of
11746/// comparison is kept in a GPR instead of CR. This function is purely for
11747/// codegen purposes and has some flags to guide the codegen process.
11748static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
11749 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
11750 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11751
11752 // Zero extend the operands to the largest legal integer. Originally, they
11753 // must be of a strictly smaller size.
11754 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
11755 DAG.getConstant(Size, DL, MVT::i32));
11756 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
11757 DAG.getConstant(Size, DL, MVT::i32));
11758
11759 // Swap if needed. Depends on the condition code.
11760 if (Swap)
11761 std::swap(Op0, Op1);
11762
11763 // Subtract extended integers.
11764 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
11765
11766 // Move the sign bit to the least significant position and zero out the rest.
11767 // Now the least significant bit carries the result of original comparison.
11768 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
11769 DAG.getConstant(Size - 1, DL, MVT::i32));
11770 auto Final = Shifted;
11771
11772 // Complement the result if needed. Based on the condition code.
11773 if (Complement)
11774 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
11775 DAG.getConstant(1, DL, MVT::i64));
11776
11777 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
11778}
11779
11780SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
11781 DAGCombinerInfo &DCI) const {
11782 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
11783
11784 SelectionDAG &DAG = DCI.DAG;
11785 SDLoc DL(N);
11786
11787 // Size of integers being compared has a critical role in the following
11788 // analysis, so we prefer to do this when all types are legal.
11789 if (!DCI.isAfterLegalizeDAG())
11790 return SDValue();
11791
11792 // If all users of SETCC extend its value to a legal integer type
11793 // then we replace SETCC with a subtraction
11794 for (SDNode::use_iterator UI = N->use_begin(),
11795 UE = N->use_end(); UI != UE; ++UI) {
11796 if (UI->getOpcode() != ISD::ZERO_EXTEND)
11797 return SDValue();
11798 }
11799
11800 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
11801 auto OpSize = N->getOperand(0).getValueSizeInBits();
11802
11804
11805 if (OpSize < Size) {
11806 switch (CC) {
11807 default: break;
11808 case ISD::SETULT:
11809 return generateEquivalentSub(N, Size, false, false, DL, DAG);
11810 case ISD::SETULE:
11811 return generateEquivalentSub(N, Size, true, true, DL, DAG);
11812 case ISD::SETUGT:
11813 return generateEquivalentSub(N, Size, false, true, DL, DAG);
11814 case ISD::SETUGE:
11815 return generateEquivalentSub(N, Size, true, false, DL, DAG);
11816 }
11817 }
11818
11819 return SDValue();
11820}
11821
11822SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
11823 DAGCombinerInfo &DCI) const {
11824 SelectionDAG &DAG = DCI.DAG;
11825 SDLoc dl(N);
11826
11827 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
11828 // If we're tracking CR bits, we need to be careful that we don't have:
11829 // trunc(binary-ops(zext(x), zext(y)))
11830 // or
11831 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11832 // such that we're unnecessarily moving things into GPRs when it would be
11833 // better to keep them in CR bits.
11834
11835 // Note that trunc here can be an actual i1 trunc, or can be the effective
11836 // truncation that comes from a setcc or select_cc.
11837 if (N->getOpcode() == ISD::TRUNCATE &&
11838 N->getValueType(0) != MVT::i1)
11839 return SDValue();
11840
11841 if (N->getOperand(0).getValueType() != MVT::i32 &&
11842 N->getOperand(0).getValueType() != MVT::i64)
11843 return SDValue();
11844
11845 if (N->getOpcode() == ISD::SETCC ||
11846 N->getOpcode() == ISD::SELECT_CC) {
11847 // If we're looking at a comparison, then we need to make sure that the
11848 // high bits (all except for the first) don't matter the result.
11849 ISD::CondCode CC =
11850 cast<CondCodeSDNode>(N->getOperand(
11851 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
11852 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
11853
11854 if (ISD::isSignedIntSetCC(CC)) {
11855 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
11856 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
11857 return SDValue();
11858 } else if (ISD::isUnsignedIntSetCC(CC)) {
11859 if (!DAG.MaskedValueIsZero(N->getOperand(0),
11860 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
11861 !DAG.MaskedValueIsZero(N->getOperand(1),
11862 APInt::getHighBitsSet(OpBits, OpBits-1)))
11863 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
11864 : SDValue());
11865 } else {
11866 // This is neither a signed nor an unsigned comparison, just make sure
11867 // that the high bits are equal.
11868 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
11869 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
11870
11871 // We don't really care about what is known about the first bit (if
11872 // anything), so clear it in all masks prior to comparing them.
11873 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
11874 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
11875
11876 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
11877 return SDValue();
11878 }
11879 }
11880
11881 // We now know that the higher-order bits are irrelevant, we just need to
11882 // make sure that all of the intermediate operations are bit operations, and
11883 // all inputs are extensions.
11884 if (N->getOperand(0).getOpcode() != ISD::AND &&
11885 N->getOperand(0).getOpcode() != ISD::OR &&
11886 N->getOperand(0).getOpcode() != ISD::XOR &&
11887 N->getOperand(0).getOpcode() != ISD::SELECT &&
11888 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
11889 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
11890 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
11891 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
11892 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
11893 return SDValue();
11894
11895 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
11896 N->getOperand(1).getOpcode() != ISD::AND &&
11897 N->getOperand(1).getOpcode() != ISD::OR &&
11898 N->getOperand(1).getOpcode() != ISD::XOR &&
11899 N->getOperand(1).getOpcode() != ISD::SELECT &&
11900 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
11901 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
11902 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
11903 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
11904 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
11905 return SDValue();
11906
11908 SmallVector<SDValue, 8> BinOps, PromOps;
11910
11911 for (unsigned i = 0; i < 2; ++i) {
11912 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11913 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11914 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11915 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11916 isa<ConstantSDNode>(N->getOperand(i)))
11917 Inputs.push_back(N->getOperand(i));
11918 else
11919 BinOps.push_back(N->getOperand(i));
11920
11921 if (N->getOpcode() == ISD::TRUNCATE)
11922 break;
11923 }
11924
11925 // Visit all inputs, collect all binary operations (and, or, xor and
11926 // select) that are all fed by extensions.
11927 while (!BinOps.empty()) {
11928 SDValue BinOp = BinOps.back();
11929 BinOps.pop_back();
11930
11931 if (!Visited.insert(BinOp.getNode()).second)
11932 continue;
11933
11934 PromOps.push_back(BinOp);
11935
11936 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
11937 // The condition of the select is not promoted.
11938 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
11939 continue;
11940 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
11941 continue;
11942
11943 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11944 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11945 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
11946 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
11947 isa<ConstantSDNode>(BinOp.getOperand(i))) {
11948 Inputs.push_back(BinOp.getOperand(i));
11949 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
11950 BinOp.getOperand(i).getOpcode() == ISD::OR ||
11951 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
11952 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
11953 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
11954 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
11955 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
11956 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
11957 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
11958 BinOps.push_back(BinOp.getOperand(i));
11959 } else {
11960 // We have an input that is not an extension or another binary
11961 // operation; we'll abort this transformation.
11962 return SDValue();
11963 }
11964 }
11965 }
11966
11967 // Make sure that this is a self-contained cluster of operations (which
11968 // is not quite the same thing as saying that everything has only one
11969 // use).
11970 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
11971 if (isa<ConstantSDNode>(Inputs[i]))
11972 continue;
11973
11974 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
11975 UE = Inputs[i].getNode()->use_end();
11976 UI != UE; ++UI) {
11977 SDNode *User = *UI;
11978 if (User != N && !Visited.count(User))
11979 return SDValue();
11980
11981 // Make sure that we're not going to promote the non-output-value
11982 // operand(s) or SELECT or SELECT_CC.
11983 // FIXME: Although we could sometimes handle this, and it does occur in
11984 // practice that one of the condition inputs to the select is also one of
11985 // the outputs, we currently can't deal with this.
11986 if (User->getOpcode() == ISD::SELECT) {
11987 if (User->getOperand(0) == Inputs[i])
11988 return SDValue();
11989 } else if (User->getOpcode() == ISD::SELECT_CC) {
11990 if (User->getOperand(0) == Inputs[i] ||
11991 User->getOperand(1) == Inputs[i])
11992 return SDValue();
11993 }
11994 }
11995 }
11996
11997 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
11998 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
11999 UE = PromOps[i].getNode()->use_end();
12000 UI != UE; ++UI) {
12001 SDNode *User = *UI;
12002 if (User != N && !Visited.count(User))
12003 return SDValue();
12004
12005 // Make sure that we're not going to promote the non-output-value
12006 // operand(s) or SELECT or SELECT_CC.
12007 // FIXME: Although we could sometimes handle this, and it does occur in
12008 // practice that one of the condition inputs to the select is also one of
12009 // the outputs, we currently can't deal with this.
12010 if (User->getOpcode() == ISD::SELECT) {
12011 if (User->getOperand(0) == PromOps[i])
12012 return SDValue();
12013 } else if (User->getOpcode() == ISD::SELECT_CC) {
12014 if (User->getOperand(0) == PromOps[i] ||
12015 User->getOperand(1) == PromOps[i])
12016 return SDValue();
12017 }
12018 }
12019 }
12020
12021 // Replace all inputs with the extension operand.
12022 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12023 // Constants may have users outside the cluster of to-be-promoted nodes,
12024 // and so we need to replace those as we do the promotions.
12025 if (isa<ConstantSDNode>(Inputs[i]))
12026 continue;
12027 else
12028 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12029 }
12030
12031 std::list<HandleSDNode> PromOpHandles;
12032 for (auto &PromOp : PromOps)
12033 PromOpHandles.emplace_back(PromOp);
12034
12035 // Replace all operations (these are all the same, but have a different
12036 // (i1) return type). DAG.getNode will validate that the types of
12037 // a binary operator match, so go through the list in reverse so that
12038 // we've likely promoted both operands first. Any intermediate truncations or
12039 // extensions disappear.
12040 while (!PromOpHandles.empty()) {
12041 SDValue PromOp = PromOpHandles.back().getValue();
12042 PromOpHandles.pop_back();
12043
12044 if (PromOp.getOpcode() == ISD::TRUNCATE ||
12045 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12046 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12047 PromOp.getOpcode() == ISD::ANY_EXTEND) {
12048 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12049 PromOp.getOperand(0).getValueType() != MVT::i1) {
12050 // The operand is not yet ready (see comment below).
12051 PromOpHandles.emplace_front(PromOp);
12052 continue;
12053 }
12054
12055 SDValue RepValue = PromOp.getOperand(0);
12056 if (isa<ConstantSDNode>(RepValue))
12057 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12058
12059 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12060 continue;
12061 }
12062
12063 unsigned C;
12064 switch (PromOp.getOpcode()) {
12065 default: C = 0; break;
12066 case ISD::SELECT: C = 1; break;
12067 case ISD::SELECT_CC: C = 2; break;
12068 }
12069
12070 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12071 PromOp.getOperand(C).getValueType() != MVT::i1) ||
12072 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12073 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12074 // The to-be-promoted operands of this node have not yet been
12075 // promoted (this should be rare because we're going through the
12076 // list backward, but if one of the operands has several users in
12077 // this cluster of to-be-promoted nodes, it is possible).
12078 PromOpHandles.emplace_front(PromOp);
12079 continue;
12080 }
12081
12083 PromOp.getNode()->op_end());
12084
12085 // If there are any constant inputs, make sure they're replaced now.
12086 for (unsigned i = 0; i < 2; ++i)
12087 if (isa<ConstantSDNode>(Ops[C+i]))
12088 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12089
12090 DAG.ReplaceAllUsesOfValueWith(PromOp,
12091 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12092 }
12093
12094 // Now we're left with the initial truncation itself.
12095 if (N->getOpcode() == ISD::TRUNCATE)
12096 return N->getOperand(0);
12097
12098 // Otherwise, this is a comparison. The operands to be compared have just
12099 // changed type (to i1), but everything else is the same.
12100 return SDValue(N, 0);
12101}
12102
12103SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12104 DAGCombinerInfo &DCI) const {
12105 SelectionDAG &DAG = DCI.DAG;
12106 SDLoc dl(N);
12107
12108 // If we're tracking CR bits, we need to be careful that we don't have:
12109 // zext(binary-ops(trunc(x), trunc(y)))
12110 // or
12111 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12112 // such that we're unnecessarily moving things into CR bits that can more
12113 // efficiently stay in GPRs. Note that if we're not certain that the high
12114 // bits are set as required by the final extension, we still may need to do
12115 // some masking to get the proper behavior.
12116
12117 // This same functionality is important on PPC64 when dealing with
12118 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12119 // the return values of functions. Because it is so similar, it is handled
12120 // here as well.
12121
12122 if (N->getValueType(0) != MVT::i32 &&
12123 N->getValueType(0) != MVT::i64)
12124 return SDValue();
12125
12126 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12127 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12128 return SDValue();
12129
12130 if (N->getOperand(0).getOpcode() != ISD::AND &&
12131 N->getOperand(0).getOpcode() != ISD::OR &&
12132 N->getOperand(0).getOpcode() != ISD::XOR &&
12133 N->getOperand(0).getOpcode() != ISD::SELECT &&
12134 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12135 return SDValue();
12136
12138 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12140
12141 // Visit all inputs, collect all binary operations (and, or, xor and
12142 // select) that are all fed by truncations.
12143 while (!BinOps.empty()) {
12144 SDValue BinOp = BinOps.back();
12145 BinOps.pop_back();
12146
12147 if (!Visited.insert(BinOp.getNode()).second)
12148 continue;
12149
12150 PromOps.push_back(BinOp);
12151
12152 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12153 // The condition of the select is not promoted.
12154 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12155 continue;
12156 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12157 continue;
12158
12159 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12160 isa<ConstantSDNode>(BinOp.getOperand(i))) {
12161 Inputs.push_back(BinOp.getOperand(i));
12162 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12163 BinOp.getOperand(i).getOpcode() == ISD::OR ||
12164 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12165 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12166 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12167 BinOps.push_back(BinOp.getOperand(i));
12168 } else {
12169 // We have an input that is not a truncation or another binary
12170 // operation; we'll abort this transformation.
12171 return SDValue();
12172 }
12173 }
12174 }
12175
12176 // The operands of a select that must be truncated when the select is
12177 // promoted because the operand is actually part of the to-be-promoted set.
12178 DenseMap<SDNode *, EVT> SelectTruncOp[2];
12179
12180 // Make sure that this is a self-contained cluster of operations (which
12181 // is not quite the same thing as saying that everything has only one
12182 // use).
12183 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12184 if (isa<ConstantSDNode>(Inputs[i]))
12185 continue;
12186
12187 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12188 UE = Inputs[i].getNode()->use_end();
12189 UI != UE; ++UI) {
12190 SDNode *User = *UI;
12191 if (User != N && !Visited.count(User))
12192 return SDValue();
12193
12194 // If we're going to promote the non-output-value operand(s) or SELECT or
12195 // SELECT_CC, record them for truncation.
12196 if (User->getOpcode() == ISD::SELECT) {
12197 if (User->getOperand(0) == Inputs[i])
12198 SelectTruncOp[0].insert(std::make_pair(User,
12199 User->getOperand(0).getValueType()));
12200 } else if (User->getOpcode() == ISD::SELECT_CC) {
12201 if (User->getOperand(0) == Inputs[i])
12202 SelectTruncOp[0].insert(std::make_pair(User,
12203 User->getOperand(0).getValueType()));
12204 if (User->getOperand(1) == Inputs[i])
12205 SelectTruncOp[1].insert(std::make_pair(User,
12206 User->getOperand(1).getValueType()));
12207 }
12208 }
12209 }
12210
12211 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12212 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12213 UE = PromOps[i].getNode()->use_end();
12214 UI != UE; ++UI) {
12215 SDNode *User = *UI;
12216 if (User != N && !Visited.count(User))
12217 return SDValue();
12218
12219 // If we're going to promote the non-output-value operand(s) or SELECT or
12220 // SELECT_CC, record them for truncation.
12221 if (User->getOpcode() == ISD::SELECT) {
12222 if (User->getOperand(0) == PromOps[i])
12223 SelectTruncOp[0].insert(std::make_pair(User,
12224 User->getOperand(0).getValueType()));
12225 } else if (User->getOpcode() == ISD::SELECT_CC) {
12226 if (User->getOperand(0) == PromOps[i])
12227 SelectTruncOp[0].insert(std::make_pair(User,
12228 User->getOperand(0).getValueType()));
12229 if (User->getOperand(1) == PromOps[i])
12230 SelectTruncOp[1].insert(std::make_pair(User,
12231 User->getOperand(1).getValueType()));
12232 }
12233 }
12234 }
12235
12236 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12237 bool ReallyNeedsExt = false;
12238 if (N->getOpcode() != ISD::ANY_EXTEND) {
12239 // If all of the inputs are not already sign/zero extended, then
12240 // we'll still need to do that at the end.
12241 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12242 if (isa<ConstantSDNode>(Inputs[i]))
12243 continue;
12244
12245 unsigned OpBits =
12246 Inputs[i].getOperand(0).getValueSizeInBits();
12247 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12248
12249 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12250 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12251 APInt::getHighBitsSet(OpBits,
12252 OpBits-PromBits))) ||
12253 (N->getOpcode() == ISD::SIGN_EXTEND &&
12254 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12255 (OpBits-(PromBits-1)))) {
12256 ReallyNeedsExt = true;
12257 break;
12258 }
12259 }
12260 }
12261
12262 // Replace all inputs, either with the truncation operand, or a
12263 // truncation or extension to the final output type.
12264 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12265 // Constant inputs need to be replaced with the to-be-promoted nodes that
12266 // use them because they might have users outside of the cluster of
12267 // promoted nodes.
12268 if (isa<ConstantSDNode>(Inputs[i]))
12269 continue;
12270
12271 SDValue InSrc = Inputs[i].getOperand(0);
12272 if (Inputs[i].getValueType() == N->getValueType(0))
12273 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12274 else if (N->getOpcode() == ISD::SIGN_EXTEND)
12275 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12276 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12277 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12278 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12279 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12280 else
12281 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12282 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12283 }
12284
12285 std::list<HandleSDNode> PromOpHandles;
12286 for (auto &PromOp : PromOps)
12287 PromOpHandles.emplace_back(PromOp);
12288
12289 // Replace all operations (these are all the same, but have a different
12290 // (promoted) return type). DAG.getNode will validate that the types of
12291 // a binary operator match, so go through the list in reverse so that
12292 // we've likely promoted both operands first.
12293 while (!PromOpHandles.empty()) {
12294 SDValue PromOp = PromOpHandles.back().getValue();
12295 PromOpHandles.pop_back();
12296
12297 unsigned C;
12298 switch (PromOp.getOpcode()) {
12299 default: C = 0; break;
12300 case ISD::SELECT: C = 1; break;
12301 case ISD::SELECT_CC: C = 2; break;
12302 }
12303
12304 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12305 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
12306 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12307 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
12308 // The to-be-promoted operands of this node have not yet been
12309 // promoted (this should be rare because we're going through the
12310 // list backward, but if one of the operands has several users in
12311 // this cluster of to-be-promoted nodes, it is possible).
12312 PromOpHandles.emplace_front(PromOp);
12313 continue;
12314 }
12315
12316 // For SELECT and SELECT_CC nodes, we do a similar check for any
12317 // to-be-promoted comparison inputs.
12318 if (PromOp.getOpcode() == ISD::SELECT ||
12319 PromOp.getOpcode() == ISD::SELECT_CC) {
12320 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
12321 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
12322 (SelectTruncOp[1].count(PromOp.getNode()) &&
12323 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
12324 PromOpHandles.emplace_front(PromOp);
12325 continue;
12326 }
12327 }
12328
12330 PromOp.getNode()->op_end());
12331
12332 // If this node has constant inputs, then they'll need to be promoted here.
12333 for (unsigned i = 0; i < 2; ++i) {
12334 if (!isa<ConstantSDNode>(Ops[C+i]))
12335 continue;
12336 if (Ops[C+i].getValueType() == N->getValueType(0))
12337 continue;
12338
12339 if (N->getOpcode() == ISD::SIGN_EXTEND)
12340 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12341 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12342 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12343 else
12344 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
12345 }
12346
12347 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12348 // truncate them again to the original value type.
12349 if (PromOp.getOpcode() == ISD::SELECT ||
12350 PromOp.getOpcode() == ISD::SELECT_CC) {
12351 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
12352 if (SI0 != SelectTruncOp[0].end())
12353 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
12354 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
12355 if (SI1 != SelectTruncOp[1].end())
12356 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
12357 }
12358
12359 DAG.ReplaceAllUsesOfValueWith(PromOp,
12360 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
12361 }
12362
12363 // Now we're left with the initial extension itself.
12364 if (!ReallyNeedsExt)
12365 return N->getOperand(0);
12366
12367 // To zero extend, just mask off everything except for the first bit (in the
12368 // i1 case).
12369 if (N->getOpcode() == ISD::ZERO_EXTEND)
12370 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
12372 N->getValueSizeInBits(0), PromBits),
12373 dl, N->getValueType(0)));
12374
12375 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
12376 "Invalid extension type");
12377 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
12378 SDValue ShiftCst =
12379 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
12380 return DAG.getNode(
12381 ISD::SRA, dl, N->getValueType(0),
12382 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
12383 ShiftCst);
12384}
12385
12386SDValue PPCTargetLowering::combineSetCC(SDNode *N,
12387 DAGCombinerInfo &DCI) const {
12388 assert(N->getOpcode() == ISD::SETCC &&
12389 "Should be called with a SETCC node");
12390
12391 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12392 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
12393 SDValue LHS = N->getOperand(0);
12394 SDValue RHS = N->getOperand(1);
12395
12396 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12397 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
12398 LHS.hasOneUse())
12399 std::swap(LHS, RHS);
12400
12401 // x == 0-y --> x+y == 0
12402 // x != 0-y --> x+y != 0
12403 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
12404 RHS.hasOneUse()) {
12405 SDLoc DL(N);
12406 SelectionDAG &DAG = DCI.DAG;
12407 EVT VT = N->getValueType(0);
12408 EVT OpVT = LHS.getValueType();
12409 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
12410 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
12411 }
12412 }
12413
12414 return DAGCombineTruncBoolExt(N, DCI);
12415}
12416
12417// Is this an extending load from an f32 to an f64?
12418static bool isFPExtLoad(SDValue Op) {
12419 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
12420 return LD->getExtensionType() == ISD::EXTLOAD &&
12421 Op.getValueType() == MVT::f64;
12422 return false;
12423}
12424
12425/// Reduces the number of fp-to-int conversion when building a vector.
12426///
12427/// If this vector is built out of floating to integer conversions,
12428/// transform it to a vector built out of floating point values followed by a
12429/// single floating to integer conversion of the vector.
12430/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
12431/// becomes (fptosi (build_vector ($A, $B, ...)))
12432SDValue PPCTargetLowering::
12433combineElementTruncationToVectorTruncation(SDNode *N,
12434 DAGCombinerInfo &DCI) const {
12435 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12436 "Should be called with a BUILD_VECTOR node");
12437
12438 SelectionDAG &DAG = DCI.DAG;
12439 SDLoc dl(N);
12440
12441 SDValue FirstInput = N->getOperand(0);
12442 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
12443 "The input operand must be an fp-to-int conversion.");
12444
12445 // This combine happens after legalization so the fp_to_[su]i nodes are
12446 // already converted to PPCSISD nodes.
12447 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
12448 if (FirstConversion == PPCISD::FCTIDZ ||
12449 FirstConversion == PPCISD::FCTIDUZ ||
12450 FirstConversion == PPCISD::FCTIWZ ||
12451 FirstConversion == PPCISD::FCTIWUZ) {
12452 bool IsSplat = true;
12453 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
12454 FirstConversion == PPCISD::FCTIWUZ;
12455 EVT SrcVT = FirstInput.getOperand(0).getValueType();
12457 EVT TargetVT = N->getValueType(0);
12458 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12459 SDValue NextOp = N->getOperand(i);
12460 if (NextOp.getOpcode() != PPCISD::MFVSR)
12461 return SDValue();
12462 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
12463 if (NextConversion != FirstConversion)
12464 return SDValue();
12465 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12466 // This is not valid if the input was originally double precision. It is
12467 // also not profitable to do unless this is an extending load in which
12468 // case doing this combine will allow us to combine consecutive loads.
12469 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
12470 return SDValue();
12471 if (N->getOperand(i) != FirstInput)
12472 IsSplat = false;
12473 }
12474
12475 // If this is a splat, we leave it as-is since there will be only a single
12476 // fp-to-int conversion followed by a splat of the integer. This is better
12477 // for 32-bit and smaller ints and neutral for 64-bit ints.
12478 if (IsSplat)
12479 return SDValue();
12480
12481 // Now that we know we have the right type of node, get its operands
12482 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
12483 SDValue In = N->getOperand(i).getOperand(0);
12484 if (Is32Bit) {
12485 // For 32-bit values, we need to add an FP_ROUND node (if we made it
12486 // here, we know that all inputs are extending loads so this is safe).
12487 if (In.isUndef())
12488 Ops.push_back(DAG.getUNDEF(SrcVT));
12489 else {
12490 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
12491 MVT::f32, In.getOperand(0),
12492 DAG.getIntPtrConstant(1, dl));
12493 Ops.push_back(Trunc);
12494 }
12495 } else
12496 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
12497 }
12498
12499 unsigned Opcode;
12500 if (FirstConversion == PPCISD::FCTIDZ ||
12501 FirstConversion == PPCISD::FCTIWZ)
12502 Opcode = ISD::FP_TO_SINT;
12503 else
12504 Opcode = ISD::FP_TO_UINT;
12505
12506 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
12507 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
12508 return DAG.getNode(Opcode, dl, TargetVT, BV);
12509 }
12510 return SDValue();
12511}
12512
12513/// Reduce the number of loads when building a vector.
12514///
12515/// Building a vector out of multiple loads can be converted to a load
12516/// of the vector type if the loads are consecutive. If the loads are
12517/// consecutive but in descending order, a shuffle is added at the end
12518/// to reorder the vector.
12520 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12521 "Should be called with a BUILD_VECTOR node");
12522
12523 SDLoc dl(N);
12524
12525 // Return early for non byte-sized type, as they can't be consecutive.
12526 if (!N->getValueType(0).getVectorElementType().isByteSized())
12527 return SDValue();
12528
12529 bool InputsAreConsecutiveLoads = true;
12530 bool InputsAreReverseConsecutive = true;
12531 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
12532 SDValue FirstInput = N->getOperand(0);
12533 bool IsRoundOfExtLoad = false;
12534
12535 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
12536 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
12537 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
12538 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
12539 }
12540 // Not a build vector of (possibly fp_rounded) loads.
12541 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
12542 N->getNumOperands() == 1)
12543 return SDValue();
12544
12545 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
12546 // If any inputs are fp_round(extload), they all must be.
12547 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
12548 return SDValue();
12549
12550 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
12551 N->getOperand(i);
12552 if (NextInput.getOpcode() != ISD::LOAD)
12553 return SDValue();
12554
12555 SDValue PreviousInput =
12556 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
12557 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
12558 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
12559
12560 // If any inputs are fp_round(extload), they all must be.
12561 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
12562 return SDValue();
12563
12564 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
12565 InputsAreConsecutiveLoads = false;
12566 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
12567 InputsAreReverseConsecutive = false;
12568
12569 // Exit early if the loads are neither consecutive nor reverse consecutive.
12570 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
12571 return SDValue();
12572 }
12573
12574 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
12575 "The loads cannot be both consecutive and reverse consecutive.");
12576
12577 SDValue FirstLoadOp =
12578 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
12579 SDValue LastLoadOp =
12580 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
12581 N->getOperand(N->getNumOperands()-1);
12582
12583 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
12584 LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
12585 if (InputsAreConsecutiveLoads) {
12586 assert(LD1 && "Input needs to be a LoadSDNode.");
12587 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
12588 LD1->getBasePtr(), LD1->getPointerInfo(),
12589 LD1->getAlignment());
12590 }
12591 if (InputsAreReverseConsecutive) {
12592 assert(LDL && "Input needs to be a LoadSDNode.");
12593 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
12594 LDL->getBasePtr(), LDL->getPointerInfo(),
12595 LDL->getAlignment());
12597 for (int i = N->getNumOperands() - 1; i >= 0; i--)
12598 Ops.push_back(i);
12599
12600 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
12601 DAG.getUNDEF(N->getValueType(0)), Ops);
12602 }
12603 return SDValue();
12604}
12605
12606// This function adds the required vector_shuffle needed to get
12607// the elements of the vector extract in the correct position
12608// as specified by the CorrectElems encoding.
12610 SDValue Input, uint64_t Elems,
12611 uint64_t CorrectElems) {
12612 SDLoc dl(N);
12613
12614 unsigned NumElems = Input.getValueType().getVectorNumElements();
12615 SmallVector<int, 16> ShuffleMask(NumElems, -1);
12616
12617 // Knowing the element indices being extracted from the original
12618 // vector and the order in which they're being inserted, just put
12619 // them at element indices required for the instruction.
12620 for (unsigned i = 0; i < N->getNumOperands(); i++) {
12621 if (DAG.getDataLayout().isLittleEndian())
12622 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
12623 else
12624 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
12625 CorrectElems = CorrectElems >> 8;
12626 Elems = Elems >> 8;
12627 }
12628
12629 SDValue Shuffle =
12630 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
12631 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
12632
12633 EVT Ty = N->getValueType(0);
12634 SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
12635 return BV;
12636}
12637
12638// Look for build vector patterns where input operands come from sign
12639// extended vector_extract elements of specific indices. If the correct indices
12640// aren't used, add a vector shuffle to fix up the indices and create a new
12641// PPCISD:SExtVElems node which selects the vector sign extend instructions
12642// during instruction selection.
12644 // This array encodes the indices that the vector sign extend instructions
12645 // extract from when extending from one type to another for both BE and LE.
12646 // The right nibble of each byte corresponds to the LE incides.
12647 // and the left nibble of each byte corresponds to the BE incides.
12648 // For example: 0x3074B8FC byte->word
12649 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12650 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12651 // For example: 0x000070F8 byte->double word
12652 // For LE: the allowed indices are: 0x0,0x8
12653 // For BE: the allowed indices are: 0x7,0xF
12654 uint64_t TargetElems[] = {
12655 0x3074B8FC, // b->w
12656 0x000070F8, // b->d
12657 0x10325476, // h->w
12658 0x00003074, // h->d
12659 0x00001032, // w->d
12660 };
12661
12662 uint64_t Elems = 0;
12663 int Index;
12664 SDValue Input;
12665
12666 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
12667 if (!Op)
12668 return false;
12669 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
12670 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
12671 return false;
12672
12673 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12674 // of the right width.
12675 SDValue Extract = Op.getOperand(0);
12676 if (Extract.getOpcode() == ISD::ANY_EXTEND)
12677 Extract = Extract.getOperand(0);
12678 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12679 return false;
12680
12681 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
12682 if (!ExtOp)
12683 return false;
12684
12685 Index = ExtOp->getZExtValue();
12686 if (Input && Input != Extract.getOperand(0))
12687 return false;
12688
12689 if (!Input)
12690 Input = Extract.getOperand(0);
12691
12692 Elems = Elems << 8;
12693 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
12694 Elems |= Index;
12695
12696 return true;
12697 };
12698
12699 // If the build vector operands aren't sign extended vector extracts,
12700 // of the same input vector, then return.
12701 for (unsigned i = 0; i < N->getNumOperands(); i++) {
12702 if (!isSExtOfVecExtract(N->getOperand(i))) {
12703 return SDValue();
12704 }
12705 }
12706
12707 // If the vector extract indicies are not correct, add the appropriate
12708 // vector_shuffle.
12709 int TgtElemArrayIdx;
12710 int InputSize = Input.getValueType().getScalarSizeInBits();
12711 int OutputSize = N->getValueType(0).getScalarSizeInBits();
12712 if (InputSize + OutputSize == 40)
12713 TgtElemArrayIdx = 0;
12714 else if (InputSize + OutputSize == 72)
12715 TgtElemArrayIdx = 1;
12716 else if (InputSize + OutputSize == 48)
12717 TgtElemArrayIdx = 2;
12718 else if (InputSize + OutputSize == 80)
12719 TgtElemArrayIdx = 3;
12720 else if (InputSize + OutputSize == 96)
12721 TgtElemArrayIdx = 4;
12722 else
12723 return SDValue();
12724
12725 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
12726 CorrectElems = DAG.getDataLayout().isLittleEndian()
12727 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
12728 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
12729 if (Elems != CorrectElems) {
12730 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
12731 }
12732
12733 // Regular lowering will catch cases where a shuffle is not needed.
12734 return SDValue();
12735}
12736
12737SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
12738 DAGCombinerInfo &DCI) const {
12739 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
12740 "Should be called with a BUILD_VECTOR node");
12741
12742 SelectionDAG &DAG = DCI.DAG;
12743 SDLoc dl(N);
12744
12745 if (!Subtarget.hasVSX())
12746 return SDValue();
12747
12748 // The target independent DAG combiner will leave a build_vector of
12749 // float-to-int conversions intact. We can generate MUCH better code for
12750 // a float-to-int conversion of a vector of floats.
12751 SDValue FirstInput = N->getOperand(0);
12752 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
12753 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
12754 if (Reduced)
12755 return Reduced;
12756 }
12757
12758 // If we're building a vector out of consecutive loads, just load that
12759 // vector type.
12760 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
12761 if (Reduced)
12762 return Reduced;
12763
12764 // If we're building a vector out of extended elements from another vector
12765 // we have P9 vector integer extend instructions. The code assumes legal
12766 // input types (i.e. it can't handle things like v4i16) so do not run before
12767 // legalization.
12768 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
12769 Reduced = combineBVOfVecSExt(N, DAG);
12770 if (Reduced)
12771 return Reduced;
12772 }
12773
12774
12775 if (N->getValueType(0) != MVT::v2f64)
12776 return SDValue();
12777
12778 // Looking for:
12779 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12780 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
12781 FirstInput.getOpcode() != ISD::UINT_TO_FP)
12782 return SDValue();
12783 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
12784 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
12785 return SDValue();
12786 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
12787 return SDValue();
12788
12789 SDValue Ext1 = FirstInput.getOperand(0);
12790 SDValue Ext2 = N->getOperand(1).getOperand(0);
12791 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12793 return SDValue();
12794
12795 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
12796 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
12797 if (!Ext1Op || !Ext2Op)
12798 return SDValue();
12799 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
12800 Ext1.getOperand(0) != Ext2.getOperand(0))
12801 return SDValue();
12802
12803 int FirstElem = Ext1Op->getZExtValue();
12804 int SecondElem = Ext2Op->getZExtValue();
12805 int SubvecIdx;
12806 if (FirstElem == 0 && SecondElem == 1)
12807 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
12808 else if (FirstElem == 2 && SecondElem == 3)
12809 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
12810 else
12811 return SDValue();
12812
12813 SDValue SrcVec = Ext1.getOperand(0);
12814 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
12816 return DAG.getNode(NodeType, dl, MVT::v2f64,
12817 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
12818}
12819
12820SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
12821 DAGCombinerInfo &DCI) const {
12822 assert((N->getOpcode() == ISD::SINT_TO_FP ||
12823 N->getOpcode() == ISD::UINT_TO_FP) &&
12824 "Need an int -> FP conversion node here");
12825
12826 if (useSoftFloat() || !Subtarget.has64BitSupport())
12827 return SDValue();
12828
12829 SelectionDAG &DAG = DCI.DAG;
12830 SDLoc dl(N);
12831 SDValue Op(N, 0);
12832
12833 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12834 // from the hardware.
12835 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
12836 return SDValue();
12837 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
12838 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
12839 return SDValue();
12840
12841 SDValue FirstOperand(Op.getOperand(0));
12842 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
12843 (FirstOperand.getValueType() == MVT::i8 ||
12844 FirstOperand.getValueType() == MVT::i16);
12845 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
12846 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
12847 bool DstDouble = Op.getValueType() == MVT::f64;
12848 unsigned ConvOp = Signed ?
12849 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
12850 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
12851 SDValue WidthConst =
12852 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
12853 dl, false);
12854 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
12855 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
12858 Ops, MVT::i8, LDN->getMemOperand());
12859
12860 // For signed conversion, we need to sign-extend the value in the VSR
12861 if (Signed) {
12862 SDValue ExtOps[] = { Ld, WidthConst };
12863 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
12864 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
12865 } else
12866 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
12867 }
12868
12869
12870 // For i32 intermediate values, unfortunately, the conversion functions
12871 // leave the upper 32 bits of the value are undefined. Within the set of
12872 // scalar instructions, we have no method for zero- or sign-extending the
12873 // value. Thus, we cannot handle i32 intermediate values here.
12874 if (Op.getOperand(0).getValueType() == MVT::i32)
12875 return SDValue();
12876
12877 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
12878 "UINT_TO_FP is supported only with FPCVT");
12879
12880 // If we have FCFIDS, then use it when converting to single-precision.
12881 // Otherwise, convert to double-precision and then round.
12882 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12883 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
12885 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
12886 : PPCISD::FCFID);
12887 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
12888 ? MVT::f32
12889 : MVT::f64;
12890
12891 // If we're converting from a float, to an int, and back to a float again,
12892 // then we don't need the store/load pair at all.
12893 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
12894 Subtarget.hasFPCVT()) ||
12895 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
12896 SDValue Src = Op.getOperand(0).getOperand(0);
12897 if (Src.getValueType() == MVT::f32) {
12898 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
12899 DCI.AddToWorklist(Src.getNode());
12900 } else if (Src.getValueType() != MVT::f64) {
12901 // Make sure that we don't pick up a ppc_fp128 source value.
12902 return SDValue();
12903 }
12904
12905 unsigned FCTOp =
12906 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
12908
12909 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
12910 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
12911
12912 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
12913 FP = DAG.getNode(ISD::FP_ROUND, dl,
12914 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
12915 DCI.AddToWorklist(FP.getNode());
12916 }
12917
12918 return FP;
12919 }
12920
12921 return SDValue();
12922}
12923
12924// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12925// builtins) into loads with swaps.
12927 DAGCombinerInfo &DCI) const {
12928 SelectionDAG &DAG = DCI.DAG;
12929 SDLoc dl(N);
12930 SDValue Chain;
12931 SDValue Base;
12932 MachineMemOperand *MMO;
12933
12934 switch (N->getOpcode()) {
12935 default:
12936 llvm_unreachable("Unexpected opcode for little endian VSX load");
12937 case ISD::LOAD: {
12938 LoadSDNode *LD = cast<LoadSDNode>(N);
12939 Chain = LD->getChain();
12940 Base = LD->getBasePtr();
12941 MMO = LD->getMemOperand();
12942 // If the MMO suggests this isn't a load of a full vector, leave
12943 // things alone. For a built-in, we have to make the change for
12944 // correctness, so if there is a size problem that will be a bug.
12945 if (MMO->getSize() < 16)
12946 return SDValue();
12947 break;
12948 }
12950 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
12951 Chain = Intrin->getChain();
12952 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12953 // us what we want. Get operand 2 instead.
12954 Base = Intrin->getOperand(2);
12955 MMO = Intrin->getMemOperand();
12956 break;
12957 }
12958 }
12959
12960 MVT VecTy = N->getValueType(0).getSimpleVT();
12961
12962 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12963 // aligned and the type is a vector with elements up to 4 bytes
12964 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
12965 && VecTy.getScalarSizeInBits() <= 32 ) {
12966 return SDValue();
12967 }
12968
12969 SDValue LoadOps[] = { Chain, Base };
12972 LoadOps, MVT::v2f64, MMO);
12973
12974 DCI.AddToWorklist(Load.getNode());
12975 Chain = Load.getValue(1);
12976 SDValue Swap = DAG.getNode(
12977 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
12978 DCI.AddToWorklist(Swap.getNode());
12979
12980 // Add a bitcast if the resulting load type doesn't match v2f64.
12981 if (VecTy != MVT::v2f64) {
12982 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
12983 DCI.AddToWorklist(N.getNode());
12984 // Package {bitcast value, swap's chain} to match Load's shape.
12985 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
12986 N, Swap.getValue(1));
12987 }
12988
12989 return Swap;
12990}
12991
12992// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
12993// builtins) into stores with swaps.
12995 DAGCombinerInfo &DCI) const {
12996 SelectionDAG &DAG = DCI.DAG;
12997 SDLoc dl(N);
12998 SDValue Chain;
12999 SDValue Base;
13000 unsigned SrcOpnd;
13001 MachineMemOperand *MMO;
13002
13003 switch (N->getOpcode()) {
13004 default:
13005 llvm_unreachable("Unexpected opcode for little endian VSX store");
13006 case ISD::STORE: {
13007 StoreSDNode *ST = cast<StoreSDNode>(N);
13008 Chain = ST->getChain();
13009 Base = ST->getBasePtr();
13010 MMO = ST->getMemOperand();
13011 SrcOpnd = 1;
13012 // If the MMO suggests this isn't a store of a full vector, leave
13013 // things alone. For a built-in, we have to make the change for
13014 // correctness, so if there is a size problem that will be a bug.
13015 if (MMO->getSize() < 16)
13016 return SDValue();
13017 break;
13018 }
13019 case ISD::INTRINSIC_VOID: {
13020 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13021 Chain = Intrin->getChain();
13022 // Intrin->getBasePtr() oddly does not get what we want.
13023 Base = Intrin->getOperand(3);
13024 MMO = Intrin->getMemOperand();
13025 SrcOpnd = 2;
13026 break;
13027 }
13028 }
13029
13030 SDValue Src = N->getOperand(SrcOpnd);
13031 MVT VecTy = Src.getValueType().getSimpleVT();
13032
13033 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13034 // aligned and the type is a vector with elements up to 4 bytes
13035 if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
13036 && VecTy.getScalarSizeInBits() <= 32 ) {
13037 return SDValue();
13038 }
13039
13040 // All stores are done as v2f64 and possible bit cast.
13041 if (VecTy != MVT::v2f64) {
13042 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13043 DCI.AddToWorklist(Src.getNode());
13044 }
13045
13046 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13047 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13048 DCI.AddToWorklist(Swap.getNode());
13049 Chain = Swap.getValue(1);
13050 SDValue StoreOps[] = { Chain, Swap, Base };
13052 DAG.getVTList(MVT::Other),
13053 StoreOps, VecTy, MMO);
13054 DCI.AddToWorklist(Store.getNode());
13055 return Store;
13056}
13057
13058// Handle DAG combine for STORE (FP_TO_INT F).
13059SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13060 DAGCombinerInfo &DCI) const {
13061
13062 SelectionDAG &DAG = DCI.DAG;
13063 SDLoc dl(N);
13064 unsigned Opcode = N->getOperand(1).getOpcode();
13065
13066 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13067 && "Not a FP_TO_INT Instruction!");
13068
13069 SDValue Val = N->getOperand(1).getOperand(0);
13070 EVT Op1VT = N->getOperand(1).getValueType();
13071 EVT ResVT = Val.getValueType();
13072
13073 // Floating point types smaller than 32 bits are not legal on Power.
13074 if (ResVT.getScalarSizeInBits() < 32)
13075 return SDValue();
13076
13077 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13078 bool ValidTypeForStoreFltAsInt =
13079 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13080 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13081
13082 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
13083 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13084 return SDValue();
13085
13086 // Extend f32 values to f64
13087 if (ResVT.getScalarSizeInBits() == 32) {
13088 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13089 DCI.AddToWorklist(Val.getNode());
13090 }
13091
13092 // Set signed or unsigned conversion opcode.
13093 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13096
13097 Val = DAG.getNode(ConvOpcode,
13098 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13099 DCI.AddToWorklist(Val.getNode());
13100
13101 // Set number of bytes being converted.
13102 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13103 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13104 DAG.getIntPtrConstant(ByteSize, dl, false),
13105 DAG.getValueType(Op1VT) };
13106
13108 DAG.getVTList(MVT::Other), Ops,
13109 cast<StoreSDNode>(N)->getMemoryVT(),
13110 cast<StoreSDNode>(N)->getMemOperand());
13111
13112 DCI.AddToWorklist(Val.getNode());
13113 return Val;
13114}
13115
13117 DAGCombinerInfo &DCI) const {
13118 SelectionDAG &DAG = DCI.DAG;
13119 SDLoc dl(N);
13120 switch (N->getOpcode()) {
13121 default: break;
13122 case ISD::ADD:
13123 return combineADD(N, DCI);
13124 case ISD::SHL:
13125 return combineSHL(N, DCI);
13126 case ISD::SRA:
13127 return combineSRA(N, DCI);
13128 case ISD::SRL:
13129 return combineSRL(N, DCI);
13130 case ISD::MUL:
13131 return combineMUL(N, DCI);
13132 case PPCISD::SHL:
13133 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
13134 return N->getOperand(0);
13135 break;
13136 case PPCISD::SRL:
13137 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
13138 return N->getOperand(0);
13139 break;
13140 case PPCISD::SRA:
13141 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
13142 if (C->isNullValue() || // 0 >>s V -> 0.
13143 C->isAllOnesValue()) // -1 >>s V -> -1.
13144 return N->getOperand(0);
13145 }
13146 break;
13147 case ISD::SIGN_EXTEND:
13148 case ISD::ZERO_EXTEND:
13149 case ISD::ANY_EXTEND:
13150 return DAGCombineExtBoolTrunc(N, DCI);
13151 case ISD::TRUNCATE:
13152 return combineTRUNCATE(N, DCI);
13153 case ISD::SETCC:
13154 if (SDValue CSCC = combineSetCC(N, DCI))
13155 return CSCC;
13157 case ISD::SELECT_CC:
13158 return DAGCombineTruncBoolExt(N, DCI);
13159 case ISD::SINT_TO_FP:
13160 case ISD::UINT_TO_FP:
13161 return combineFPToIntToFP(N, DCI);
13162 case ISD::STORE: {
13163
13164 EVT Op1VT = N->getOperand(1).getValueType();
13165 unsigned Opcode = N->getOperand(1).getOpcode();
13166
13167 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
13168 SDValue Val= combineStoreFPToInt(N, DCI);
13169 if (Val)
13170 return Val;
13171 }
13172
13173 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13174 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
13175 N->getOperand(1).getNode()->hasOneUse() &&
13176 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
13177 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
13178
13179 // STBRX can only handle simple types and it makes no sense to store less
13180 // two bytes in byte-reversed order.
13181 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
13182 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
13183 break;
13184
13185 SDValue BSwapOp = N->getOperand(1).getOperand(0);
13186 // Do an any-extend to 32-bits if this is a half-word input.
13187 if (BSwapOp.getValueType() == MVT::i16)
13188 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
13189
13190 // If the type of BSWAP operand is wider than stored memory width
13191 // it need to be shifted to the right side before STBRX.
13192 if (Op1VT.bitsGT(mVT)) {
13193 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
13194 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
13195 DAG.getConstant(Shift, dl, MVT::i32));
13196 // Need to truncate if this is a bswap of i64 stored as i32/i16.
13197 if (Op1VT == MVT::i64)
13198 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
13199 }
13200
13201 SDValue Ops[] = {
13202 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
13203 };
13204 return
13206 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
13207 cast<StoreSDNode>(N)->getMemOperand());
13208 }
13209
13210 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
13211 // So it can increase the chance of CSE constant construction.
13212 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
13213 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
13214 // Need to sign-extended to 64-bits to handle negative values.
13215 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
13216 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
13217 MemVT.getSizeInBits());
13218 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
13219
13220 // DAG.getTruncStore() can't be used here because it doesn't accept
13221 // the general (base + offset) addressing mode.
13222 // So we use UpdateNodeOperands and setTruncatingStore instead.
13223 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
13224 N->getOperand(3));
13225 cast<StoreSDNode>(N)->setTruncatingStore(true);
13226 return SDValue(N, 0);
13227 }
13228
13229 // For little endian, VSX stores require generating xxswapd/lxvd2x.
13230 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13231 if (Op1VT.isSimple()) {
13232 MVT StoreVT = Op1VT.getSimpleVT();
13233 if (Subtarget.needsSwapsForVSXMemOps() &&
13234 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
13235 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
13236 return expandVSXStoreForLE(N, DCI);
13237 }
13238 break;
13239 }
13240 case ISD::LOAD: {
13241 LoadSDNode *LD = cast<LoadSDNode>(N);
13242 EVT VT = LD->getValueType(0);
13243
13244 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13245 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13246 if (VT.isSimple()) {
13247 MVT LoadVT = VT.getSimpleVT();
13248 if (Subtarget.needsSwapsForVSXMemOps() &&
13249 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
13250 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
13251 return expandVSXLoadForLE(N, DCI);
13252 }
13253
13254 // We sometimes end up with a 64-bit integer load, from which we extract
13255 // two single-precision floating-point numbers. This happens with
13256 // std::complex<float>, and other similar structures, because of the way we
13257 // canonicalize structure copies. However, if we lack direct moves,
13258 // then the final bitcasts from the extracted integer values to the
13259 // floating-point numbers turn into store/load pairs. Even with direct moves,
13260 // just loading the two floating-point numbers is likely better.
13261 auto ReplaceTwoFloatLoad = [&]() {
13262 if (VT != MVT::i64)
13263 return false;
13264
13265 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
13266 LD->isVolatile())
13267 return false;
13268
13269 // We're looking for a sequence like this:
13270 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13271 // t16: i64 = srl t13, Constant:i32<32>
13272 // t17: i32 = truncate t16
13273 // t18: f32 = bitcast t17
13274 // t19: i32 = truncate t13
13275 // t20: f32 = bitcast t19
13276
13277 if (!LD->hasNUsesOfValue(2, 0))
13278 return false;
13279
13280 auto UI = LD->use_begin();
13281 while (UI.getUse().getResNo() != 0) ++UI;
13282 SDNode *Trunc = *UI++;
13283 while (UI.getUse().getResNo() != 0) ++UI;
13284 SDNode *RightShift = *UI;
13285 if (Trunc->getOpcode() != ISD::TRUNCATE)
13286 std::swap(Trunc, RightShift);
13287
13288 if (Trunc->getOpcode() != ISD::TRUNCATE ||
13289 Trunc->getValueType(0) != MVT::i32 ||
13290 !Trunc->hasOneUse())
13291 return false;
13292 if (RightShift->getOpcode() != ISD::SRL ||
13293 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
13294 RightShift->getConstantOperandVal(1) != 32 ||
13295 !RightShift->hasOneUse())
13296 return false;
13297
13298 SDNode *Trunc2 = *RightShift->use_begin();
13299 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
13300 Trunc2->getValueType(0) != MVT::i32 ||
13301 !Trunc2->hasOneUse())
13302 return false;
13303
13304 SDNode *Bitcast = *Trunc->use_begin();
13305 SDNode *Bitcast2 = *Trunc2->use_begin();
13306
13307 if (Bitcast->getOpcode() != ISD::BITCAST ||
13308 Bitcast->getValueType(0) != MVT::f32)
13309 return false;
13310 if (Bitcast2->getOpcode() != ISD::BITCAST ||
13311 Bitcast2->getValueType(0) != MVT::f32)
13312 return false;
13313
13314 if (Subtarget.isLittleEndian())
13315 std::swap(Bitcast, Bitcast2);
13316
13317 // Bitcast has the second float (in memory-layout order) and Bitcast2
13318 // has the first one.
13319
13320 SDValue BasePtr = LD->getBasePtr();
13321 if (LD->isIndexed()) {
13322 assert(LD->getAddressingMode() == ISD::PRE_INC &&
13323 "Non-pre-inc AM on PPC?");
13324 BasePtr =
13325 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
13326 LD->getOffset());
13327 }
13328
13329 auto MMOFlags =
13330 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
13331 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
13332 LD->getPointerInfo(), LD->getAlignment(),
13333 MMOFlags, LD->getAAInfo());
13334 SDValue AddPtr =
13335 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
13336 BasePtr, DAG.getIntPtrConstant(4, dl));
13337 SDValue FloatLoad2 = DAG.getLoad(
13338 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
13339 LD->getPointerInfo().getWithOffset(4),
13340 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
13341
13342 if (LD->isIndexed()) {
13343 // Note that DAGCombine should re-form any pre-increment load(s) from
13344 // what is produced here if that makes sense.
13345 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
13346 }
13347
13348 DCI.CombineTo(Bitcast2, FloatLoad);
13349 DCI.CombineTo(Bitcast, FloatLoad2);
13350
13351 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
13352 SDValue(FloatLoad2.getNode(), 1));
13353 return true;
13354 };
13355
13356 if (ReplaceTwoFloatLoad())
13357 return SDValue(N, 0);
13358
13359 EVT MemVT = LD->getMemoryVT();
13360 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
13361 unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
13362 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
13363 unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
13364 if (LD->isUnindexed() && VT.isVector() &&
13365 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
13366 // P8 and later hardware should just use LOAD.
13367 !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
13368 VT == MVT::v4i32 || VT == MVT::v4f32)) ||
13369 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
13370 LD->getAlignment() >= ScalarABIAlignment)) &&
13371 LD->getAlignment() < ABIAlignment) {
13372 // This is a type-legal unaligned Altivec or QPX load.
13373 SDValue Chain = LD->getChain();
13374 SDValue Ptr = LD->getBasePtr();
13375 bool isLittleEndian = Subtarget.isLittleEndian();
13376
13377 // This implements the loading of unaligned vectors as described in
13378 // the venerable Apple Velocity Engine overview. Specifically:
13379 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13380 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13381 //
13382 // The general idea is to expand a sequence of one or more unaligned
13383 // loads into an alignment-based permutation-control instruction (lvsl
13384 // or lvsr), a series of regular vector loads (which always truncate
13385 // their input address to an aligned address), and a series of
13386 // permutations. The results of these permutations are the requested
13387 // loaded values. The trick is that the last "extra" load is not taken
13388 // from the address you might suspect (sizeof(vector) bytes after the
13389 // last requested load), but rather sizeof(vector) - 1 bytes after the
13390 // last requested vector. The point of this is to avoid a page fault if
13391 // the base address happened to be aligned. This works because if the
13392 // base address is aligned, then adding less than a full vector length
13393 // will cause the last vector in the sequence to be (re)loaded.
13394 // Otherwise, the next vector will be fetched as you might suspect was
13395 // necessary.
13396
13397 // We might be able to reuse the permutation generation from
13398 // a different base address offset from this one by an aligned amount.
13399 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13400 // optimization later.
13401 Intrinsic::ID Intr, IntrLD, IntrPerm;
13402 MVT PermCntlTy, PermTy, LDTy;
13403 if (Subtarget.hasAltivec()) {
13404 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
13405 Intrinsic::ppc_altivec_lvsl;
13406 IntrLD = Intrinsic::ppc_altivec_lvx;
13407 IntrPerm = Intrinsic::ppc_altivec_vperm;
13408 PermCntlTy = MVT::v16i8;
13409 PermTy = MVT::v4i32;
13410 LDTy = MVT::v4i32;
13411 } else {
13412 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
13413 Intrinsic::ppc_qpx_qvlpcls;
13414 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
13415 Intrinsic::ppc_qpx_qvlfs;
13416 IntrPerm = Intrinsic::ppc_qpx_qvfperm;
13417 PermCntlTy = MVT::v4f64;
13418 PermTy = MVT::v4f64;
13419 LDTy = MemVT.getSimpleVT();
13420 }
13421
13422 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
13423
13424 // Create the new MMO for the new base load. It is like the original MMO,
13425 // but represents an area in memory almost twice the vector size centered
13426 // on the original address. If the address is unaligned, we might start
13427 // reading up to (sizeof(vector)-1) bytes below the address of the
13428 // original unaligned load.
13430 MachineMemOperand *BaseMMO =
13431 MF.getMachineMemOperand(LD->getMemOperand(),
13432 -(long)MemVT.getStoreSize()+1,
13433 2*MemVT.getStoreSize()-1);
13434
13435 // Create the new base load.
13436 SDValue LDXIntID =
13437 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
13438 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
13439 SDValue BaseLoad =
13441 DAG.getVTList(PermTy, MVT::Other),
13442 BaseLoadOps, LDTy, BaseMMO);
13443
13444 // Note that the value of IncOffset (which is provided to the next
13445 // load's pointer info offset value, and thus used to calculate the
13446 // alignment), and the value of IncValue (which is actually used to
13447 // increment the pointer value) are different! This is because we
13448 // require the next load to appear to be aligned, even though it
13449 // is actually offset from the base pointer by a lesser amount.
13450 int IncOffset = VT.getSizeInBits() / 8;
13451 int IncValue = IncOffset;
13452
13453 // Walk (both up and down) the chain looking for another load at the real
13454 // (aligned) offset (the alignment of the other load does not matter in
13455 // this case). If found, then do not use the offset reduction trick, as
13456 // that will prevent the loads from being later combined (as they would
13457 // otherwise be duplicates).
13458 if (!findConsecutiveLoad(LD, DAG))
13459 --IncValue;
13460
13461 SDValue Increment =
13462 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
13463 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
13464
13465 MachineMemOperand *ExtraMMO =
13466 MF.getMachineMemOperand(LD->getMemOperand(),
13467 1, 2*MemVT.getStoreSize()-1);
13468 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
13469 SDValue ExtraLoad =
13471 DAG.getVTList(PermTy, MVT::Other),
13472 ExtraLoadOps, LDTy, ExtraMMO);
13473
13475 BaseLoad.getValue(1), ExtraLoad.getValue(1));
13476
13477 // Because vperm has a big-endian bias, we must reverse the order
13478 // of the input vectors and complement the permute control vector
13479 // when generating little endian code. We have already handled the
13480 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13481 // and ExtraLoad here.
13482 SDValue Perm;
13483 if (isLittleEndian)
13484 Perm = BuildIntrinsicOp(IntrPerm,
13485 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
13486 else
13487 Perm = BuildIntrinsicOp(IntrPerm,
13488 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
13489
13490 if (VT != PermTy)
13491 Perm = Subtarget.hasAltivec() ?
13492 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
13493 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
13494 DAG.getTargetConstant(1, dl, MVT::i64));
13495 // second argument is 1 because this rounding
13496 // is always exact.
13497
13498 // The output of the permutation is our loaded result, the TokenFactor is
13499 // our new chain.
13500 DCI.CombineTo(N, Perm, TF);
13501 return SDValue(N, 0);
13502 }
13503 }
13504 break;
13506 bool isLittleEndian = Subtarget.isLittleEndian();
13507 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13508 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
13509 : Intrinsic::ppc_altivec_lvsl);
13510 if ((IID == Intr ||
13511 IID == Intrinsic::ppc_qpx_qvlpcld ||
13512 IID == Intrinsic::ppc_qpx_qvlpcls) &&
13513 N->getOperand(1)->getOpcode() == ISD::ADD) {
13514 SDValue Add = N->getOperand(1);
13515
13516 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
13517 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13518
13519 if (DAG.MaskedValueIsZero(Add->getOperand(1),
13520 APInt::getAllOnesValue(Bits /* alignment */)
13521 .zext(Add.getScalarValueSizeInBits()))) {
13522 SDNode *BasePtr = Add->getOperand(0).getNode();
13523 for (SDNode::use_iterator UI = BasePtr->use_begin(),
13524 UE = BasePtr->use_end();
13525 UI != UE; ++UI) {
13526 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13527 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
13528 // We've found another LVSL/LVSR, and this address is an aligned
13529 // multiple of that one. The results will be the same, so use the
13530 // one we've just found instead.
13531
13532 return SDValue(*UI, 0);
13533 }
13534 }
13535 }
13536
13537 if (isa<ConstantSDNode>(Add->getOperand(1))) {
13538 SDNode *BasePtr = Add->getOperand(0).getNode();
13539 for (SDNode::use_iterator UI = BasePtr->use_begin(),
13540 UE = BasePtr->use_end(); UI != UE; ++UI) {
13541 if (UI->getOpcode() == ISD::ADD &&
13542 isa<ConstantSDNode>(UI->getOperand(1)) &&
13543 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
13544 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
13545 (1ULL << Bits) == 0) {
13546 SDNode *OtherAdd = *UI;
13547 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
13548 VE = OtherAdd->use_end(); VI != VE; ++VI) {
13549 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13550 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
13551 return SDValue(*VI, 0);
13552 }
13553 }
13554 }
13555 }
13556 }
13557 }
13558
13559 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13560 // Expose the vabsduw/h/b opportunity for down stream
13561 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
13562 (IID == Intrinsic::ppc_altivec_vmaxsw ||
13563 IID == Intrinsic::ppc_altivec_vmaxsh ||
13564 IID == Intrinsic::ppc_altivec_vmaxsb)) {
13565 SDValue V1 = N->getOperand(1);
13566 SDValue V2 = N->getOperand(2);
13567 if ((V1.getSimpleValueType() == MVT::v4i32 ||
13569 V1.getSimpleValueType() == MVT::v16i8) &&
13570 V1.getSimpleValueType() == V2.getSimpleValueType()) {
13571 // (0-a, a)
13572 if (V1.getOpcode() == ISD::SUB &&
13574 V1.getOperand(1) == V2) {
13575 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
13576 }
13577 // (a, 0-a)
13578 if (V2.getOpcode() == ISD::SUB &&
13579 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
13580 V2.getOperand(1) == V1) {
13581 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13582 }
13583 // (x-y, y-x)
13584 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
13585 V1.getOperand(0) == V2.getOperand(1) &&
13586 V1.getOperand(1) == V2.getOperand(0)) {
13587 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
13588 }
13589 }
13590 }
13591 }
13592
13593 break;
13595 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13596 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13597 if (Subtarget.needsSwapsForVSXMemOps()) {
13598 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13599 default:
13600 break;
13601 case Intrinsic::ppc_vsx_lxvw4x:
13602 case Intrinsic::ppc_vsx_lxvd2x:
13603 return expandVSXLoadForLE(N, DCI);
13604 }
13605 }
13606 break;
13608 // For little endian, VSX stores require generating xxswapd/stxvd2x.
13609 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13610 if (Subtarget.needsSwapsForVSXMemOps()) {
13611 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13612 default:
13613 break;
13614 case Intrinsic::ppc_vsx_stxvw4x:
13615 case Intrinsic::ppc_vsx_stxvd2x:
13616 return expandVSXStoreForLE(N, DCI);
13617 }
13618 }
13619 break;
13620 case ISD::BSWAP:
13621 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13622 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
13623 N->getOperand(0).hasOneUse() &&
13624 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
13625 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
13626 N->getValueType(0) == MVT::i64))) {
13627 SDValue Load = N->getOperand(0);
13628 LoadSDNode *LD = cast<LoadSDNode>(Load);
13629 // Create the byte-swapping load.
13630 SDValue Ops[] = {
13631 LD->getChain(), // Chain
13632 LD->getBasePtr(), // Ptr
13633 DAG.getValueType(N->getValueType(0)) // VT
13634 };
13635 SDValue BSLoad =
13637 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
13639 Ops, LD->getMemoryVT(), LD->getMemOperand());
13640
13641 // If this is an i16 load, insert the truncate.
13642 SDValue ResVal = BSLoad;
13643 if (N->getValueType(0) == MVT::i16)
13644 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
13645
13646 // First, combine the bswap away. This makes the value produced by the
13647 // load dead.
13648 DCI.CombineTo(N, ResVal);
13649
13650 // Next, combine the load away, we give it a bogus result value but a real
13651 // chain result. The result value is dead because the bswap is dead.
13652 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
13653
13654 // Return N so it doesn't get rechecked!
13655 return SDValue(N, 0);
13656 }
13657 break;
13658 case PPCISD::VCMP:
13659 // If a VCMPo node already exists with exactly the same operands as this
13660 // node, use its result instead of this node (VCMPo computes both a CR6 and
13661 // a normal output).
13662 //
13663 if (!N->getOperand(0).hasOneUse() &&
13664 !N->getOperand(1).hasOneUse() &&
13665 !N->getOperand(2).hasOneUse()) {
13666
13667 // Scan all of the users of the LHS, looking for VCMPo's that match.
13668 SDNode *VCMPoNode = nullptr;
13669
13670 SDNode *LHSN = N->getOperand(0).getNode();
13671 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
13672 UI != E; ++UI)
13673 if (UI->getOpcode() == PPCISD::VCMPo &&
13674 UI->getOperand(1) == N->getOperand(1) &&
13675 UI->getOperand(2) == N->getOperand(2) &&
13676 UI->getOperand(0) == N->getOperand(0)) {
13677 VCMPoNode = *UI;
13678 break;
13679 }
13680
13681 // If there is no VCMPo node, or if the flag value has a single use, don't
13682 // transform this.
13683 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
13684 break;
13685
13686 // Look at the (necessarily single) use of the flag value. If it has a
13687 // chain, this transformation is more complex. Note that multiple things
13688 // could use the value result, which we should ignore.
13689 SDNode *FlagUser = nullptr;
13690 for (SDNode::use_iterator UI = VCMPoNode->use_begin();
13691 FlagUser == nullptr; ++UI) {
13692 assert(UI != VCMPoNode->use_end() && "Didn't find user!");
13693 SDNode *User = *UI;
13694 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
13695 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
13696 FlagUser = User;
13697 break;
13698 }
13699 }
13700 }
13701
13702 // If the user is a MFOCRF instruction, we know this is safe.
13703 // Otherwise we give up for right now.
13704 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
13705 return SDValue(VCMPoNode, 0);
13706 }
13707 break;
13708 case ISD::BRCOND: {
13709 SDValue Cond = N->getOperand(1);
13710 SDValue Target = N->getOperand(2);
13711
13712 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13713 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
13714 Intrinsic::loop_decrement) {
13715
13716 // We now need to make the intrinsic dead (it cannot be instruction
13717 // selected).
13718 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
13719 assert(Cond.getNode()->hasOneUse() &&
13720 "Counter decrement has more than one use");
13721
13722 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
13723 N->getOperand(0), Target);
13724 }
13725 }
13726 break;
13727 case ISD::BR_CC: {
13728 // If this is a branch on an altivec predicate comparison, lower this so
13729 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13730 // lowering is done pre-legalize, because the legalizer lowers the predicate
13731 // compare down to code that is difficult to reassemble.
13732 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
13733 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
13734
13735 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13736 // value. If so, pass-through the AND to get to the intrinsic.
13737 if (LHS.getOpcode() == ISD::AND &&
13739 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
13740 Intrinsic::loop_decrement &&
13741 isa<ConstantSDNode>(LHS.getOperand(1)) &&
13742 !isNullConstant(LHS.getOperand(1)))
13743 LHS = LHS.getOperand(0);
13744
13745 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
13746 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
13747 Intrinsic::loop_decrement &&
13748 isa<ConstantSDNode>(RHS)) {
13749 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
13750 "Counter decrement comparison is not EQ or NE");
13751
13752 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13753 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
13754 (CC == ISD::SETNE && !Val);
13755
13756 // We now need to make the intrinsic dead (it cannot be instruction
13757 // selected).
13758 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
13759 assert(LHS.getNode()->hasOneUse() &&
13760 "Counter decrement has more than one use");
13761
13762 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
13763 N->getOperand(0), N->getOperand(4));
13764 }
13765
13766 int CompareOpc;
13767 bool isDot;
13768
13769 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
13770 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
13771 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
13772 assert(isDot && "Can't compare against a vector result!");
13773
13774 // If this is a comparison against something other than 0/1, then we know
13775 // that the condition is never/always true.
13776 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
13777 if (Val != 0 && Val != 1) {
13778 if (CC == ISD::SETEQ) // Cond never true, remove branch.
13779 return N->getOperand(0);
13780 // Always !=, turn it into an unconditional branch.
13781 return DAG.getNode(ISD::BR, dl, MVT::Other,
13782 N->getOperand(0), N->getOperand(4));
13783 }
13784
13785 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
13786
13787 // Create the PPCISD altivec 'dot' comparison node.
13788 SDValue Ops[] = {
13789 LHS.getOperand(2), // LHS of compare
13790 LHS.getOperand(3), // RHS of compare
13791 DAG.getConstant(CompareOpc, dl, MVT::i32)
13792 };
13793 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
13794 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
13795
13796 // Unpack the result based on how the target uses it.
13797 PPC::Predicate CompOpc;
13798 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
13799 default: // Can't happen, don't crash on invalid number though.
13800 case 0: // Branch on the value of the EQ bit of CR6.
13801 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
13802 break;
13803 case 1: // Branch on the inverted value of the EQ bit of CR6.
13804 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
13805 break;
13806 case 2: // Branch on the value of the LT bit of CR6.
13807 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
13808 break;
13809 case 3: // Branch on the inverted value of the LT bit of CR6.
13810 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
13811 break;
13812 }
13813
13814 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
13815 DAG.getConstant(CompOpc, dl, MVT::i32),
13816 DAG.getRegister(PPC::CR6, MVT::i32),
13817 N->getOperand(4), CompNode.getValue(1));
13818 }
13819 break;
13820 }
13821 case ISD::BUILD_VECTOR:
13822 return DAGCombineBuildVector(N, DCI);
13823 case ISD::ABS:
13824 return combineABS(N, DCI);
13825 case ISD::VSELECT:
13826 return combineVSelect(N, DCI);
13827 }
13828
13829 return SDValue();
13830}
13831
13832SDValue
13834 SelectionDAG &DAG,
13835 SmallVectorImpl<SDNode *> &Created) const {
13836 // fold (sdiv X, pow2)
13837 EVT VT = N->getValueType(0);
13838 if (VT == MVT::i64 && !Subtarget.isPPC64())
13839 return SDValue();
13840 if ((VT != MVT::i32 && VT != MVT::i64) ||
13841 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
13842 return SDValue();
13843
13844 SDLoc DL(N);
13845 SDValue N0 = N->getOperand(0);
13846
13847 bool IsNegPow2 = (-Divisor).isPowerOf2();
13848 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
13849 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
13850
13851 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
13852 Created.push_back(Op.getNode());
13853
13854 if (IsNegPow2) {
13855 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
13856 Created.push_back(Op.getNode());
13857 }
13858
13859 return Op;
13860}
13861
13862//===----------------------------------------------------------------------===//
13863// Inline Assembly Support
13864//===----------------------------------------------------------------------===//
13865
13867 KnownBits &Known,
13868 const APInt &DemandedElts,
13869 const SelectionDAG &DAG,
13870 unsigned Depth) const {
13871 Known.resetAll();
13872 switch (Op.getOpcode()) {
13873 default: break;
13874 case PPCISD::LBRX: {
13875 // lhbrx is known to have the top bits cleared out.
13876 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
13877 Known.Zero = 0xFFFF0000;
13878 break;
13879 }
13881 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
13882 default: break;
13883 case Intrinsic::ppc_altivec_vcmpbfp_p:
13884 case Intrinsic::ppc_altivec_vcmpeqfp_p:
13885 case Intrinsic::ppc_altivec_vcmpequb_p:
13886 case Intrinsic::ppc_altivec_vcmpequh_p:
13887 case Intrinsic::ppc_altivec_vcmpequw_p:
13888 case Intrinsic::ppc_altivec_vcmpequd_p:
13889 case Intrinsic::ppc_altivec_vcmpgefp_p:
13890 case Intrinsic::ppc_altivec_vcmpgtfp_p:
13891 case Intrinsic::ppc_altivec_vcmpgtsb_p:
13892 case Intrinsic::ppc_altivec_vcmpgtsh_p:
13893 case Intrinsic::ppc_altivec_vcmpgtsw_p:
13894 case Intrinsic::ppc_altivec_vcmpgtsd_p:
13895 case Intrinsic::ppc_altivec_vcmpgtub_p:
13896 case Intrinsic::ppc_altivec_vcmpgtuh_p:
13897 case Intrinsic::ppc_altivec_vcmpgtuw_p:
13898 case Intrinsic::ppc_altivec_vcmpgtud_p:
13899 Known.Zero = ~1U; // All bits but the low one are known to be zero.
13900 break;
13901 }
13902 }
13903 }
13904}
13905
13907 switch (Subtarget.getDarwinDirective()) {
13908 default: break;
13909 case PPC::DIR_970:
13910 case PPC::DIR_PWR4:
13911 case PPC::DIR_PWR5:
13912 case PPC::DIR_PWR5X:
13913 case PPC::DIR_PWR6:
13914 case PPC::DIR_PWR6X:
13915 case PPC::DIR_PWR7:
13916 case PPC::DIR_PWR8:
13917 case PPC::DIR_PWR9: {
13918 if (!ML)
13919 break;
13920
13922 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
13923 // so that we can decrease cache misses and branch-prediction misses.
13924 // Actual alignment of the loop will depend on the hotness check and other
13925 // logic in alignBlocks.
13926 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
13927 return 5;
13928 }
13929
13930 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13931
13932 // For small loops (between 5 and 8 instructions), align to a 32-byte
13933 // boundary so that the entire loop fits in one instruction-cache line.
13934 uint64_t LoopSize = 0;
13935 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
13936 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
13937 LoopSize += TII->getInstSizeInBytes(*J);
13938 if (LoopSize > 32)
13939 break;
13940 }
13941
13942 if (LoopSize > 16 && LoopSize <= 32)
13943 return 5;
13944
13945 break;
13946 }
13947 }
13948
13950}
13951
13952/// getConstraintType - Given a constraint, return the type of
13953/// constraint it is for this target.
13956 if (Constraint.size() == 1) {
13957 switch (Constraint[0]) {
13958 default: break;
13959 case 'b':
13960 case 'r':
13961 case 'f':
13962 case 'd':
13963 case 'v':
13964 case 'y':
13965 return C_RegisterClass;
13966 case 'Z':
13967 // FIXME: While Z does indicate a memory constraint, it specifically
13968 // indicates an r+r address (used in conjunction with the 'y' modifier
13969 // in the replacement string). Currently, we're forcing the base
13970 // register to be r0 in the asm printer (which is interpreted as zero)
13971 // and forming the complete address in the second register. This is
13972 // suboptimal.
13973 return C_Memory;
13974 }
13975 } else if (Constraint == "wc") { // individual CR bits.
13976 return C_RegisterClass;
13977 } else if (Constraint == "wa" || Constraint == "wd" ||
13978 Constraint == "wf" || Constraint == "ws" ||
13979 Constraint == "wi" || Constraint == "ww") {
13980 return C_RegisterClass; // VSX registers.
13981 }
13982 return TargetLowering::getConstraintType(Constraint);
13983}
13984
13985/// Examine constraint type and operand type and determine a weight value.
13986/// This object must already have been set up with the operand type
13987/// and the current alternative constraint selected.
13990 AsmOperandInfo &info, const char *constraint) const {
13992 Value *CallOperandVal = info.CallOperandVal;
13993 // If we don't have a value, we can't do a match,
13994 // but allow it at the lowest weight.
13995 if (!CallOperandVal)
13996 return CW_Default;
13997 Type *type = CallOperandVal->getType();
13998
13999 // Look at the constraint type.
14000 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
14001 return CW_Register; // an individual CR bit.
14002 else if ((StringRef(constraint) == "wa" ||
14003 StringRef(constraint) == "wd" ||
14004 StringRef(constraint) == "wf") &&
14005 type->isVectorTy())
14006 return CW_Register;
14007 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
14008 return CW_Register; // just hold 64-bit integers data.
14009 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
14010 return CW_Register;
14011 else if (StringRef(constraint) == "ww" && type->isFloatTy())
14012 return CW_Register;
14013
14014 switch (*constraint) {
14015 default:
14017 break;
14018 case 'b':
14019 if (type->isIntegerTy())
14020 weight = CW_Register;
14021 break;
14022 case 'f':
14023 if (type->isFloatTy())
14024 weight = CW_Register;
14025 break;
14026 case 'd':
14027 if (type->isDoubleTy())
14028 weight = CW_Register;
14029 break;
14030 case 'v':
14031 if (type->isVectorTy())
14032 weight = CW_Register;
14033 break;
14034 case 'y':
14035 weight = CW_Register;
14036 break;
14037 case 'Z':
14038 weight = CW_Memory;
14039 break;
14040 }
14041 return weight;
14042}
14043
14044std::pair<unsigned, const TargetRegisterClass *>
14046 StringRef Constraint,
14047 MVT VT) const {
14048 if (Constraint.size() == 1) {
14049 // GCC RS6000 Constraint Letters
14050 switch (Constraint[0]) {
14051 case 'b': // R1-R31
14052 if (VT == MVT::i64 && Subtarget.isPPC64())
14053 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
14054 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
14055 case 'r': // R0-R31
14056 if (VT == MVT::i64 && Subtarget.isPPC64())
14057 return std::make_pair(0U, &PPC::G8RCRegClass);
14058 return std::make_pair(0U, &PPC::GPRCRegClass);
14059 // 'd' and 'f' constraints are both defined to be "the floating point
14060 // registers", where one is for 32-bit and the other for 64-bit. We don't
14061 // really care overly much here so just give them all the same reg classes.
14062 case 'd':
14063 case 'f':
14064 if (Subtarget.hasSPE()) {
14065 if (VT == MVT::f32 || VT == MVT::i32)
14066 return std::make_pair(0U, &PPC::SPE4RCRegClass);
14067 if (VT == MVT::f64 || VT == MVT::i64)
14068 return std::make_pair(0U, &PPC::SPERCRegClass);
14069 } else {
14070 if (VT == MVT::f32 || VT == MVT::i32)
14071 return std::make_pair(0U, &PPC::F4RCRegClass);
14072 if (VT == MVT::f64 || VT == MVT::i64)
14073 return std::make_pair(0U, &PPC::F8RCRegClass);
14074 if (VT == MVT::v4f64 && Subtarget.hasQPX())
14075 return std::make_pair(0U, &PPC::QFRCRegClass);
14076 if (VT == MVT::v4f32 && Subtarget.hasQPX())
14077 return std::make_pair(0U, &PPC::QSRCRegClass);
14078 }
14079 break;
14080 case 'v':
14081 if (VT == MVT::v4f64 && Subtarget.hasQPX())
14082 return std::make_pair(0U, &PPC::QFRCRegClass);
14083 if (VT == MVT::v4f32 && Subtarget.hasQPX())
14084 return std::make_pair(0U, &PPC::QSRCRegClass);
14085 if (Subtarget.hasAltivec())
14086 return std::make_pair(0U, &PPC::VRRCRegClass);
14087 break;
14088 case 'y': // crrc
14089 return std::make_pair(0U, &PPC::CRRCRegClass);
14090 }
14091 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
14092 // An individual CR bit.
14093 return std::make_pair(0U, &PPC::CRBITRCRegClass);
14094 } else if ((Constraint == "wa" || Constraint == "wd" ||
14095 Constraint == "wf" || Constraint == "wi") &&
14096 Subtarget.hasVSX()) {
14097 return std::make_pair(0U, &PPC::VSRCRegClass);
14098 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
14099 if (VT == MVT::f32 && Subtarget.hasP8Vector())
14100 return std::make_pair(0U, &PPC::VSSRCRegClass);
14101 else
14102 return std::make_pair(0U, &PPC::VSFRCRegClass);
14103 }
14104
14105 std::pair<unsigned, const TargetRegisterClass *> R =
14107
14108 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14109 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14110 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14111 // register.
14112 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14113 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14114 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
14115 PPC::GPRCRegClass.contains(R.first))
14116 return std::make_pair(TRI->getMatchingSuperReg(R.first,
14117 PPC::sub_32, &PPC::G8RCRegClass),
14118 &PPC::G8RCRegClass);
14119
14120 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14121 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
14122 R.first = PPC::CR0;
14123 R.second = &PPC::CRRCRegClass;
14124 }
14125
14126 return R;
14127}
14128
14129/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14130/// vector. If it is invalid, don't add anything to Ops.
14132 std::string &Constraint,
14133 std::vector<SDValue>&Ops,
14134 SelectionDAG &DAG) const {
14135 SDValue Result;
14136
14137 // Only support length 1 constraints.
14138 if (Constraint.length() > 1) return;
14139
14140 char Letter = Constraint[0];
14141 switch (Letter) {
14142 default: break;
14143 case 'I':
14144 case 'J':
14145 case 'K':
14146 case 'L':
14147 case 'M':
14148 case 'N':
14149 case 'O':
14150 case 'P': {
14151 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
14152 if (!CST) return; // Must be an immediate to match.
14153 SDLoc dl(Op);
14154 int64_t Value = CST->getSExtValue();
14155 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
14156 // numbers are printed as such.
14157 switch (Letter) {
14158 default: llvm_unreachable("Unknown constraint letter!");
14159 case 'I': // "I" is a signed 16-bit constant.
14160 if (isInt<16>(Value))
14161 Result = DAG.getTargetConstant(Value, dl, TCVT);
14162 break;
14163 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
14164 if (isShiftedUInt<16, 16>(Value))
14165 Result = DAG.getTargetConstant(Value, dl, TCVT);
14166 break;
14167 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
14168 if (isShiftedInt<16, 16>(Value))
14169 Result = DAG.getTargetConstant(Value, dl, TCVT);
14170 break;
14171 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
14172 if (isUInt<16>(Value))
14173 Result = DAG.getTargetConstant(Value, dl, TCVT);
14174 break;
14175 case 'M': // "M" is a constant that is greater than 31.
14176 if (Value > 31)
14177 Result = DAG.getTargetConstant(Value, dl, TCVT);
14178 break;
14179 case 'N': // "N" is a positive constant that is an exact power of two.
14180 if (Value > 0 && isPowerOf2_64(Value))
14181 Result = DAG.getTargetConstant(Value, dl, TCVT);
14182 break;
14183 case 'O': // "O" is the constant zero.
14184 if (Value == 0)
14185 Result = DAG.getTargetConstant(Value, dl, TCVT);
14186 break;
14187 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
14188 if (isInt<16>(-Value))
14189 Result = DAG.getTargetConstant(Value, dl, TCVT);
14190 break;
14191 }
14192 break;
14193 }
14194 }
14195
14196 if (Result.getNode()) {
14197 Ops.push_back(Result);
14198 return;
14199 }
14200
14201 // Handle standard constraint letters.
14202 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
14203}
14204
14205// isLegalAddressingMode - Return true if the addressing mode represented
14206// by AM is legal for this target, for a load/store of the specified type.
14208 const AddrMode &AM, Type *Ty,
14209 unsigned AS, Instruction *I) const {
14210 // PPC does not allow r+i addressing modes for vectors!
14211 if (Ty->isVectorTy() && AM.BaseOffs != 0)
14212 return false;
14213
14214 // PPC allows a sign-extended 16-bit immediate field.
14215 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
14216 return false;
14217
14218 // No global is ever allowed as a base.
14219 if (AM.BaseGV)
14220 return false;
14221
14222 // PPC only support r+r,
14223 switch (AM.Scale) {
14224 case 0: // "r+i" or just "i", depending on HasBaseReg.
14225 break;
14226 case 1:
14227 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
14228 return false;
14229 // Otherwise we have r+r or r+i.
14230 break;
14231 case 2:
14232 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
14233 return false;
14234 // Allow 2*r as r+r.
14235 break;
14236 default:
14237 // No other scales are supported.
14238 return false;
14239 }
14240
14241 return true;
14242}
14243
14244SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
14245 SelectionDAG &DAG) const {
14247 MachineFrameInfo &MFI = MF.getFrameInfo();
14248 MFI.setReturnAddressIsTaken(true);
14249
14251 return SDValue();
14252
14253 SDLoc dl(Op);
14254 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14255
14256 // Make sure the function does not optimize away the store of the RA to
14257 // the stack.
14258 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
14259 FuncInfo->setLRStoreRequired();
14260 bool isPPC64 = Subtarget.isPPC64();
14261 auto PtrVT = getPointerTy(MF.getDataLayout());
14262
14263 if (Depth > 0) {
14264 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
14265 SDValue Offset =
14266 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
14267 isPPC64 ? MVT::i64 : MVT::i32);
14268 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
14269 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
14271 }
14272
14273 // Just load the return address off the stack.
14274 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
14275 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
14277}
14278
14279SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
14280 SelectionDAG &DAG) const {
14281 SDLoc dl(Op);
14282 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14283
14285 MachineFrameInfo &MFI = MF.getFrameInfo();
14286 MFI.setFrameAddressIsTaken(true);
14287
14288 EVT PtrVT = getPointerTy(MF.getDataLayout());
14289 bool isPPC64 = PtrVT == MVT::i64;
14290
14291 // Naked functions never have a frame pointer, and so we use r1. For all
14292 // other functions, this decision must be delayed until during PEI.
14293 unsigned FrameReg;
14294 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
14295 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
14296 else
14297 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
14298
14299 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
14300 PtrVT);
14301 while (Depth--)
14302 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
14303 FrameAddr, MachinePointerInfo());
14304 return FrameAddr;
14305}
14306
14307// FIXME? Maybe this could be a TableGen attribute on some registers and
14308// this table could be generated automatically from RegInfo.
14309unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
14310 SelectionDAG &DAG) const {
14311 bool isPPC64 = Subtarget.isPPC64();
14312 bool isDarwinABI = Subtarget.isDarwinABI();
14313
14314 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
14315 (!isPPC64 && VT != MVT::i32))
14316 report_fatal_error("Invalid register global variable type");
14317
14318 bool is64Bit = isPPC64 && VT == MVT::i64;
14319 unsigned Reg = StringSwitch<unsigned>(RegName)
14320 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
14321 .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
14322 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
14323 (is64Bit ? PPC::X13 : PPC::R13))
14324 .Default(0);
14325
14326 if (Reg)
14327 return Reg;
14328 report_fatal_error("Invalid register name global variable");
14329}
14330
14332 // 32-bit SVR4 ABI access everything as got-indirect.
14333 if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
14334 return true;
14335
14337 // If it is small or large code model, module locals are accessed
14338 // indirectly by loading their address from .toc/.got. The difference
14339 // is that for large code model we have ADDISTocHa + LDtocL and for
14340 // small code model we simply have LDtoc.
14341 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
14342 return true;
14343
14344 // JumpTable and BlockAddress are accessed as got-indirect.
14345 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
14346 return true;
14347
14348 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
14349 const GlobalValue *GV = G->getGlobal();
14350 unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
14351 // The NLP flag indicates that a global access has to use an
14352 // extra indirection.
14353 if (GVFlags & PPCII::MO_NLP_FLAG)
14354 return true;
14355 }
14356
14357 return false;
14358}
14359
14360bool
14362 // The PowerPC target isn't yet aware of offsets.
14363 return false;
14364}
14365
14367 const CallInst &I,
14368 MachineFunction &MF,
14369 unsigned Intrinsic) const {
14370 switch (Intrinsic) {
14371 case Intrinsic::ppc_qpx_qvlfd:
14372 case Intrinsic::ppc_qpx_qvlfs:
14373 case Intrinsic::ppc_qpx_qvlfcd:
14374 case Intrinsic::ppc_qpx_qvlfcs:
14375 case Intrinsic::ppc_qpx_qvlfiwa:
14376 case Intrinsic::ppc_qpx_qvlfiwz:
14377 case Intrinsic::ppc_altivec_lvx:
14378 case Intrinsic::ppc_altivec_lvxl:
14379 case Intrinsic::ppc_altivec_lvebx:
14380 case Intrinsic::ppc_altivec_lvehx:
14381 case Intrinsic::ppc_altivec_lvewx:
14382 case Intrinsic::ppc_vsx_lxvd2x:
14383 case Intrinsic::ppc_vsx_lxvw4x: {
14384 EVT VT;
14385 switch (Intrinsic) {
14386 case Intrinsic::ppc_altivec_lvebx:
14387 VT = MVT::i8;
14388 break;
14389 case Intrinsic::ppc_altivec_lvehx:
14390 VT = MVT::i16;
14391 break;
14392 case Intrinsic::ppc_altivec_lvewx:
14393 VT = MVT::i32;
14394 break;
14395 case Intrinsic::ppc_vsx_lxvd2x:
14396 VT = MVT::v2f64;
14397 break;
14398 case Intrinsic::ppc_qpx_qvlfd:
14399 VT = MVT::v4f64;
14400 break;
14401 case Intrinsic::ppc_qpx_qvlfs:
14402 VT = MVT::v4f32;
14403 break;
14404 case Intrinsic::ppc_qpx_qvlfcd:
14405 VT = MVT::v2f64;
14406 break;
14407 case Intrinsic::ppc_qpx_qvlfcs:
14408 VT = MVT::v2f32;
14409 break;
14410 default:
14411 VT = MVT::v4i32;
14412 break;
14413 }
14414
14416 Info.memVT = VT;
14417 Info.ptrVal = I.getArgOperand(0);
14418 Info.offset = -VT.getStoreSize()+1;
14419 Info.size = 2*VT.getStoreSize()-1;
14420 Info.align = 1;
14422 return true;
14423 }
14424 case Intrinsic::ppc_qpx_qvlfda:
14425 case Intrinsic::ppc_qpx_qvlfsa:
14426 case Intrinsic::ppc_qpx_qvlfcda:
14427 case Intrinsic::ppc_qpx_qvlfcsa:
14428 case Intrinsic::ppc_qpx_qvlfiwaa:
14429 case Intrinsic::ppc_qpx_qvlfiwza: {
14430 EVT VT;
14431 switch (Intrinsic) {
14432 case Intrinsic::ppc_qpx_qvlfda:
14433 VT = MVT::v4f64;
14434 break;
14435 case Intrinsic::ppc_qpx_qvlfsa:
14436 VT = MVT::v4f32;
14437 break;
14438 case Intrinsic::ppc_qpx_qvlfcda:
14439 VT = MVT::v2f64;
14440 break;
14441 case Intrinsic::ppc_qpx_qvlfcsa:
14442 VT = MVT::v2f32;
14443 break;
14444 default:
14445 VT = MVT::v4i32;
14446 break;
14447 }
14448
14450 Info.memVT = VT;
14451 Info.ptrVal = I.getArgOperand(0);
14452 Info.offset = 0;
14453 Info.size = VT.getStoreSize();
14454 Info.align = 1;
14456 return true;
14457 }
14458 case Intrinsic::ppc_qpx_qvstfd:
14459 case Intrinsic::ppc_qpx_qvstfs:
14460 case Intrinsic::ppc_qpx_qvstfcd:
14461 case Intrinsic::ppc_qpx_qvstfcs:
14462 case Intrinsic::ppc_qpx_qvstfiw:
14463 case Intrinsic::ppc_altivec_stvx:
14464 case Intrinsic::ppc_altivec_stvxl:
14465 case Intrinsic::ppc_altivec_stvebx:
14466 case Intrinsic::ppc_altivec_stvehx:
14467 case Intrinsic::ppc_altivec_stvewx:
14468 case Intrinsic::ppc_vsx_stxvd2x:
14469 case Intrinsic::ppc_vsx_stxvw4x: {
14470 EVT VT;
14471 switch (Intrinsic) {
14472 case Intrinsic::ppc_altivec_stvebx:
14473 VT = MVT::i8;
14474 break;
14475 case Intrinsic::ppc_altivec_stvehx:
14476 VT = MVT::i16;
14477 break;
14478 case Intrinsic::ppc_altivec_stvewx:
14479 VT = MVT::i32;
14480 break;
14481 case Intrinsic::ppc_vsx_stxvd2x:
14482 VT = MVT::v2f64;
14483 break;
14484 case Intrinsic::ppc_qpx_qvstfd:
14485 VT = MVT::v4f64;
14486 break;
14487 case Intrinsic::ppc_qpx_qvstfs:
14488 VT = MVT::v4f32;
14489 break;
14490 case Intrinsic::ppc_qpx_qvstfcd:
14491 VT = MVT::v2f64;
14492 break;
14493 case Intrinsic::ppc_qpx_qvstfcs:
14494 VT = MVT::v2f32;
14495 break;
14496 default:
14497 VT = MVT::v4i32;
14498 break;
14499 }
14500
14502 Info.memVT = VT;
14503 Info.ptrVal = I.getArgOperand(1);
14504 Info.offset = -VT.getStoreSize()+1;
14505 Info.size = 2*VT.getStoreSize()-1;
14506 Info.align = 1;
14508 return true;
14509 }
14510 case Intrinsic::ppc_qpx_qvstfda:
14511 case Intrinsic::ppc_qpx_qvstfsa:
14512 case Intrinsic::ppc_qpx_qvstfcda:
14513 case Intrinsic::ppc_qpx_qvstfcsa:
14514 case Intrinsic::ppc_qpx_qvstfiwa: {
14515 EVT VT;
14516 switch (Intrinsic) {
14517 case Intrinsic::ppc_qpx_qvstfda:
14518 VT = MVT::v4f64;
14519 break;
14520 case Intrinsic::ppc_qpx_qvstfsa:
14521 VT = MVT::v4f32;
14522 break;
14523 case Intrinsic::ppc_qpx_qvstfcda:
14524 VT = MVT::v2f64;
14525 break;
14526 case Intrinsic::ppc_qpx_qvstfcsa:
14527 VT = MVT::v2f32;
14528 break;
14529 default:
14530 VT = MVT::v4i32;
14531 break;
14532 }
14533
14535 Info.memVT = VT;
14536 Info.ptrVal = I.getArgOperand(1);
14537 Info.offset = 0;
14538 Info.size = VT.getStoreSize();
14539 Info.align = 1;
14541 return true;
14542 }
14543 default:
14544 break;
14545 }
14546
14547 return false;
14548}
14549
14550/// getOptimalMemOpType - Returns the target specific optimal type for load
14551/// and store operations as a result of memset, memcpy, and memmove
14552/// lowering. If DstAlign is zero that means it's safe to destination
14553/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14554/// means there isn't a need to check it against alignment requirement,
14555/// probably because the source does not need to be loaded. If 'IsMemset' is
14556/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14557/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14558/// source is constant so it does not need to be loaded.
14559/// It returns EVT::Other if the type should be determined using generic
14560/// target-independent logic.
14562 uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
14563 bool ZeroMemset, bool MemcpyStrSrc,
14564 const AttributeList &FuncAttributes) const {
14565 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
14566 // When expanding a memset, require at least two QPX instructions to cover
14567 // the cost of loading the value to be stored from the constant pool.
14568 if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
14569 (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
14570 !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
14571 return MVT::v4f64;
14572 }
14573
14574 // We should use Altivec/VSX loads and stores when available. For unaligned
14575 // addresses, unaligned VSX loads are only fast starting with the P8.
14576 if (Subtarget.hasAltivec() && Size >= 16 &&
14577 (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
14578 ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
14579 return MVT::v4i32;
14580 }
14581
14582 if (Subtarget.isPPC64()) {
14583 return MVT::i64;
14584 }
14585
14586 return MVT::i32;
14587}
14588
14589/// Returns true if it is beneficial to convert a load of a constant
14590/// to just the constant itself.
14592 Type *Ty) const {
14593 assert(Ty->isIntegerTy());
14594
14595 unsigned BitSize = Ty->getPrimitiveSizeInBits();
14596 return !(BitSize == 0 || BitSize > 64);
14597}
14598
14600 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
14601 return false;
14602 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
14603 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
14604 return NumBits1 == 64 && NumBits2 == 32;
14605}
14606
14608 if (!VT1.isInteger() || !VT2.isInteger())
14609 return false;
14610 unsigned NumBits1 = VT1.getSizeInBits();
14611 unsigned NumBits2 = VT2.getSizeInBits();
14612 return NumBits1 == 64 && NumBits2 == 32;
14613}
14614
14616 // Generally speaking, zexts are not free, but they are free when they can be
14617 // folded with other operations.
14618 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
14619 EVT MemVT = LD->getMemoryVT();
14620 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
14621 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
14622 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
14623 LD->getExtensionType() == ISD::ZEXTLOAD))
14624 return true;
14625 }
14626
14627 // FIXME: Add other cases...
14628 // - 32-bit shifts with a zext to i64
14629 // - zext after ctlz, bswap, etc.
14630 // - zext after and by a constant mask
14631
14632 return TargetLowering::isZExtFree(Val, VT2);
14633}
14634
14635bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
14636 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
14637 "invalid fpext types");
14638 // Extending to float128 is not free.
14639 if (DestVT == MVT::f128)
14640 return false;
14641 return true;
14642}
14643
14645 return isInt<16>(Imm) || isUInt<16>(Imm);
14646}
14647
14649 return isInt<16>(Imm) || isUInt<16>(Imm);
14650}
14651
14653 unsigned,
14654 unsigned,
14656 bool *Fast) const {
14658 return false;
14659
14660 // PowerPC supports unaligned memory access for simple non-vector types.
14661 // Although accessing unaligned addresses is not as efficient as accessing
14662 // aligned addresses, it is generally more efficient than manual expansion,
14663 // and generally only traps for software emulation when crossing page
14664 // boundaries.
14665
14666 if (!VT.isSimple())
14667 return false;
14668
14669 if (VT.getSimpleVT().isVector()) {
14670 if (Subtarget.hasVSX()) {
14671 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
14672 VT != MVT::v4f32 && VT != MVT::v4i32)
14673 return false;
14674 } else {
14675 return false;
14676 }
14677 }
14678
14679 if (VT == MVT::ppcf128)
14680 return false;
14681
14682 if (Fast)
14683 *Fast = true;
14684
14685 return true;
14686}
14687
14689 VT = VT.getScalarType();
14690
14691 if (!VT.isSimple())
14692 return false;
14693
14694 switch (VT.getSimpleVT().SimpleTy) {
14695 case MVT::f32:
14696 case MVT::f64:
14697 return true;
14698 case MVT::f128:
14699 return (EnableQuadPrecision && Subtarget.hasP9Vector());
14700 default:
14701 break;
14702 }
14703
14704 return false;
14705}
14706
14707const MCPhysReg *
14709 // LR is a callee-save register, but we must treat it as clobbered by any call
14710 // site. Hence we include LR in the scratch registers, which are in turn added
14711 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14712 // to CTR, which is used by any indirect call.
14713 static const MCPhysReg ScratchRegs[] = {
14714 PPC::X12, PPC::LR8, PPC::CTR8, 0
14715 };
14716
14717 return ScratchRegs;
14718}
14719
14721 const Constant *PersonalityFn) const {
14722 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
14723}
14724
14726 const Constant *PersonalityFn) const {
14727 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
14728}
14729
14730bool
14732 EVT VT , unsigned DefinedValues) const {
14733 if (VT == MVT::v2i64)
14734 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
14735
14736 if (Subtarget.hasVSX() || Subtarget.hasQPX())
14737 return true;
14738
14740}
14741
14743 if (DisableILPPref || Subtarget.enableMachineScheduler())
14745
14746 return Sched::ILP;
14747}
14748
14749// Create a fast isel object.
14750FastISel *
14752 const TargetLibraryInfo *LibInfo) const {
14753 return PPC::createFastISel(FuncInfo, LibInfo);
14754}
14755
14757 if (Subtarget.isDarwinABI()) return;
14758 if (!Subtarget.isPPC64()) return;
14759
14760 // Update IsSplitCSR in PPCFunctionInfo
14761 PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
14762 PFI->setIsSplitCSR(true);
14763}
14764
14767 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
14768 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
14769 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
14770 if (!IStart)
14771 return;
14772
14773 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14774 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
14775 MachineBasicBlock::iterator MBBI = Entry->begin();
14776 for (const MCPhysReg *I = IStart; *I; ++I) {
14777 const TargetRegisterClass *RC = nullptr;
14778 if (PPC::G8RCRegClass.contains(*I))
14779 RC = &PPC::G8RCRegClass;
14780 else if (PPC::F8RCRegClass.contains(*I))
14781 RC = &PPC::F8RCRegClass;
14782 else if (PPC::CRRCRegClass.contains(*I))
14783 RC = &PPC::CRRCRegClass;
14784 else if (PPC::VRRCRegClass.contains(*I))
14785 RC = &PPC::VRRCRegClass;
14786 else
14787 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14788
14789 unsigned NewVR = MRI->createVirtualRegister(RC);
14790 // Create copy from CSR to a virtual register.
14791 // FIXME: this currently does not emit CFI pseudo-instructions, it works
14792 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14793 // nounwind. If we want to generalize this later, we may need to emit
14794 // CFI pseudo-instructions.
14795 assert(Entry->getParent()->getFunction().hasFnAttribute(
14796 Attribute::NoUnwind) &&
14797 "Function should be nounwind in insertCopiesSplitCSR!");
14798 Entry->addLiveIn(*I);
14799 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
14800 .addReg(*I);
14801
14802 // Insert the copy-back instructions right before the terminator.
14803 for (auto *Exit : Exits)
14804 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
14805 TII->get(TargetOpcode::COPY), *I)
14806 .addReg(NewVR);
14807 }
14808}
14809
14810// Override to enable LOAD_STACK_GUARD lowering on Linux.
14812 if (!Subtarget.isTargetLinux())
14814 return true;
14815}
14816
14817// Override to disable global variable loading on Linux.
14819 if (!Subtarget.isTargetLinux())
14821}
14822
14824 bool ForCodeSize) const {
14825 if (!VT.isSimple() || !Subtarget.hasVSX())
14826 return false;
14827
14828 switch(VT.getSimpleVT().SimpleTy) {
14829 default:
14830 // For FP types that are currently not supported by PPC backend, return
14831 // false. Examples: f16, f80.
14832 return false;
14833 case MVT::f32:
14834 case MVT::f64:
14835 case MVT::ppcf128:
14836 return Imm.isPosZero();
14837 }
14838}
14839
14840// For vector shift operation op, fold
14841// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14843 SelectionDAG &DAG) {
14844 SDValue N0 = N->getOperand(0);
14845 SDValue N1 = N->getOperand(1);
14846 EVT VT = N0.getValueType();
14847 unsigned OpSizeInBits = VT.getScalarSizeInBits();
14848 unsigned Opcode = N->getOpcode();
14849 unsigned TargetOpcode;
14850
14851 switch (Opcode) {
14852 default:
14853 llvm_unreachable("Unexpected shift operation");
14854 case ISD::SHL:
14855 TargetOpcode = PPCISD::SHL;
14856 break;
14857 case ISD::SRL:
14858 TargetOpcode = PPCISD::SRL;
14859 break;
14860 case ISD::SRA:
14861 TargetOpcode = PPCISD::SRA;
14862 break;
14863 }
14864
14865 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
14866 N1->getOpcode() == ISD::AND)
14867 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
14868 if (Mask->getZExtValue() == OpSizeInBits - 1)
14869 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
14870
14871 return SDValue();
14872}
14873
14874SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
14875 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14876 return Value;
14877
14878 SDValue N0 = N->getOperand(0);
14879 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14880 if (!Subtarget.isISA3_0() ||
14881 N0.getOpcode() != ISD::SIGN_EXTEND ||
14882 N0.getOperand(0).getValueType() != MVT::i32 ||
14883 CN1 == nullptr || N->getValueType(0) != MVT::i64)
14884 return SDValue();
14885
14886 // We can't save an operation here if the value is already extended, and
14887 // the existing shift is easier to combine.
14888 SDValue ExtsSrc = N0.getOperand(0);
14889 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
14890 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
14891 return SDValue();
14892
14893 SDLoc DL(N0);
14894 SDValue ShiftBy = SDValue(CN1, 0);
14895 // We want the shift amount to be i32 on the extswli, but the shift could
14896 // have an i64.
14897 if (ShiftBy.getValueType() == MVT::i64)
14898 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
14899
14900 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
14901 ShiftBy);
14902}
14903
14904SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
14905 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14906 return Value;
14907
14908 return SDValue();
14909}
14910
14911SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
14912 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
14913 return Value;
14914
14915 return SDValue();
14916}
14917
14918// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
14919// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
14920// When C is zero, the equation (addi Z, -C) can be simplified to Z
14921// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
14923 const PPCSubtarget &Subtarget) {
14924 if (!Subtarget.isPPC64())
14925 return SDValue();
14926
14927 SDValue LHS = N->getOperand(0);
14928 SDValue RHS = N->getOperand(1);
14929
14930 auto isZextOfCompareWithConstant = [](SDValue Op) {
14931 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
14932 Op.getValueType() != MVT::i64)
14933 return false;
14934
14935 SDValue Cmp = Op.getOperand(0);
14936 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
14937 Cmp.getOperand(0).getValueType() != MVT::i64)
14938 return false;
14939
14940 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
14941 int64_t NegConstant = 0 - Constant->getSExtValue();
14942 // Due to the limitations of the addi instruction,
14943 // -C is required to be [-32768, 32767].
14944 return isInt<16>(NegConstant);
14945 }
14946
14947 return false;
14948 };
14949
14950 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
14951 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
14952
14953 // If there is a pattern, canonicalize a zext operand to the RHS.
14954 if (LHSHasPattern && !RHSHasPattern)
14955 std::swap(LHS, RHS);
14956 else if (!LHSHasPattern && !RHSHasPattern)
14957 return SDValue();
14958
14959 SDLoc DL(N);
14961 SDValue Cmp = RHS.getOperand(0);
14962 SDValue Z = Cmp.getOperand(0);
14963 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
14964
14965 assert(Constant && "Constant Should not be a null pointer.");
14966 int64_t NegConstant = 0 - Constant->getSExtValue();
14967
14968 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
14969 default: break;
14970 case ISD::SETNE: {
14971 // when C == 0
14972 // --> addze X, (addic Z, -1).carry
14973 // /
14974 // add X, (zext(setne Z, C))--
14975 // \ when -32768 <= -C <= 32767 && C != 0
14976 // --> addze X, (addic (addi Z, -C), -1).carry
14977 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14978 DAG.getConstant(NegConstant, DL, MVT::i64));
14979 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14980 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14981 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
14982 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14983 SDValue(Addc.getNode(), 1));
14984 }
14985 case ISD::SETEQ: {
14986 // when C == 0
14987 // --> addze X, (subfic Z, 0).carry
14988 // /
14989 // add X, (zext(sete Z, C))--
14990 // \ when -32768 <= -C <= 32767 && C != 0
14991 // --> addze X, (subfic (addi Z, -C), 0).carry
14992 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14993 DAG.getConstant(NegConstant, DL, MVT::i64));
14994 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14995 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14996 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
14997 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14998 SDValue(Subc.getNode(), 1));
14999 }
15000 }
15001
15002 return SDValue();
15003}
15004
15005SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
15006 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
15007 return Value;
15008
15009 return SDValue();
15010}
15011
15012// Detect TRUNCATE operations on bitcasts of float128 values.
15013// What we are looking for here is the situtation where we extract a subset
15014// of bits from a 128 bit float.
15015// This can be of two forms:
15016// 1) BITCAST of f128 feeding TRUNCATE
15017// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15018// The reason this is required is because we do not have a legal i128 type
15019// and so we want to prevent having to store the f128 and then reload part
15020// of it.
15021SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
15022 DAGCombinerInfo &DCI) const {
15023 // If we are using CRBits then try that first.
15024 if (Subtarget.useCRBits()) {
15025 // Check if CRBits did anything and return that if it did.
15026 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
15027 return CRTruncValue;
15028 }
15029
15030 SDLoc dl(N);
15031 SDValue Op0 = N->getOperand(0);
15032
15033 // Looking for a truncate of i128 to i64.
15034 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
15035 return SDValue();
15036
15037 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
15038
15039 // SRL feeding TRUNCATE.
15040 if (Op0.getOpcode() == ISD::SRL) {
15041 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
15042 // The right shift has to be by 64 bits.
15043 if (!ConstNode || ConstNode->getZExtValue() != 64)
15044 return SDValue();
15045
15046 // Switch the element number to extract.
15047 EltToExtract = EltToExtract ? 0 : 1;
15048 // Update Op0 past the SRL.
15049 Op0 = Op0.getOperand(0);
15050 }
15051
15052 // BITCAST feeding a TRUNCATE possibly via SRL.
15053 if (Op0.getOpcode() == ISD::BITCAST &&
15054 Op0.getValueType() == MVT::i128 &&
15055 Op0.getOperand(0).getValueType() == MVT::f128) {
15056 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
15057 return DCI.DAG.getNode(
15058 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
15059 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
15060 }
15061 return SDValue();
15062}
15063
15064SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
15065 SelectionDAG &DAG = DCI.DAG;
15066
15067 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
15068 if (!ConstOpOrElement)
15069 return SDValue();
15070
15071 // An imul is usually smaller than the alternative sequence for legal type.
15072 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
15073 isOperationLegal(ISD::MUL, N->getValueType(0)))
15074 return SDValue();
15075
15076 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
15077 switch (this->Subtarget.getDarwinDirective()) {
15078 default:
15079 // TODO: enhance the condition for subtarget before pwr8
15080 return false;
15081 case PPC::DIR_PWR8:
15082 // type mul add shl
15083 // scalar 4 1 1
15084 // vector 7 2 2
15085 return true;
15086 case PPC::DIR_PWR9:
15087 // type mul add shl
15088 // scalar 5 2 2
15089 // vector 7 2 2
15090
15091 // The cycle RATIO of related operations are showed as a table above.
15092 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15093 // scalar and vector type. For 2 instrs patterns, add/sub + shl
15094 // are 4, it is always profitable; but for 3 instrs patterns
15095 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15096 // So we should only do it for vector type.
15097 return IsAddOne && IsNeg ? VT.isVector() : true;
15098 }
15099 };
15100
15101 EVT VT = N->getValueType(0);
15102 SDLoc DL(N);
15103
15104 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
15105 bool IsNeg = MulAmt.isNegative();
15106 APInt MulAmtAbs = MulAmt.abs();
15107
15108 if ((MulAmtAbs - 1).isPowerOf2()) {
15109 // (mul x, 2^N + 1) => (add (shl x, N), x)
15110 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15111
15112 if (!IsProfitable(IsNeg, true, VT))
15113 return SDValue();
15114
15115 SDValue Op0 = N->getOperand(0);
15116 SDValue Op1 =
15117 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15118 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
15119 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
15120
15121 if (!IsNeg)
15122 return Res;
15123
15124 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
15125 } else if ((MulAmtAbs + 1).isPowerOf2()) {
15126 // (mul x, 2^N - 1) => (sub (shl x, N), x)
15127 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15128
15129 if (!IsProfitable(IsNeg, false, VT))
15130 return SDValue();
15131
15132 SDValue Op0 = N->getOperand(0);
15133 SDValue Op1 =
15134 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
15135 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
15136
15137 if (!IsNeg)
15138 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
15139 else
15140 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
15141
15142 } else {
15143 return SDValue();
15144 }
15145}
15146
15147bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
15148 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15149 if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
15150 return false;
15151
15152 // If not a tail call then no need to proceed.
15153 if (!CI->isTailCall())
15154 return false;
15155
15156 // If tail calls are disabled for the caller then we are done.
15157 const Function *Caller = CI->getParent()->getParent();
15158 auto Attr = Caller->getFnAttribute("disable-tail-calls");
15159 if (Attr.getValueAsString() == "true")
15160 return false;
15161
15162 // If sibling calls have been disabled and tail-calls aren't guaranteed
15163 // there is no reason to duplicate.
15164 auto &TM = getTargetMachine();
15165 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
15166 return false;
15167
15168 // Can't tail call a function called indirectly, or if it has variadic args.
15169 const Function *Callee = CI->getCalledFunction();
15170 if (!Callee || Callee->isVarArg())
15171 return false;
15172
15173 // Make sure the callee and caller calling conventions are eligible for tco.
15174 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
15175 CI->getCallingConv()))
15176 return false;
15177
15178 // If the function is local then we have a good chance at tail-calling it
15179 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
15180}
15181
15182bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
15183 if (!Subtarget.hasVSX())
15184 return false;
15185 if (Subtarget.hasP9Vector() && VT == MVT::f128)
15186 return true;
15187 return VT == MVT::f32 || VT == MVT::f64 ||
15188 VT == MVT::v4f32 || VT == MVT::v2f64;
15189}
15190
15191bool PPCTargetLowering::
15192isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
15193 const Value *Mask = AndI.getOperand(1);
15194 // If the mask is suitable for andi. or andis. we should sink the and.
15195 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
15196 // Can't handle constants wider than 64-bits.
15197 if (CI->getBitWidth() > 64)
15198 return false;
15199 int64_t ConstVal = CI->getZExtValue();
15200 return isUInt<16>(ConstVal) ||
15201 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
15202 }
15203
15204 // For non-constant masks, we can always use the record-form and.
15205 return true;
15206}
15207
15208// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15209// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15210// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15211// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15212// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15213SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
15214 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
15215 assert(Subtarget.hasP9Altivec() &&
15216 "Only combine this when P9 altivec supported!");
15217 EVT VT = N->getValueType(0);
15218 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15219 return SDValue();
15220
15221 SelectionDAG &DAG = DCI.DAG;
15222 SDLoc dl(N);
15223 if (N->getOperand(0).getOpcode() == ISD::SUB) {
15224 // Even for signed integers, if it's known to be positive (as signed
15225 // integer) due to zero-extended inputs.
15226 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
15227 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
15228 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
15229 SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
15230 (SubOpcd1 == ISD::ZERO_EXTEND ||
15231 SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
15232 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15233 N->getOperand(0)->getOperand(0),
15234 N->getOperand(0)->getOperand(1),
15235 DAG.getTargetConstant(0, dl, MVT::i32));
15236 }
15237
15238 // For type v4i32, it can be optimized with xvnegsp + vabsduw
15239 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
15240 N->getOperand(0).hasOneUse()) {
15241 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
15242 N->getOperand(0)->getOperand(0),
15243 N->getOperand(0)->getOperand(1),
15244 DAG.getTargetConstant(1, dl, MVT::i32));
15245 }
15246 }
15247
15248 return SDValue();
15249}
15250
15251// For type v4i32/v8ii16/v16i8, transform
15252// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15253// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15254// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15255// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15256SDValue PPCTargetLowering::combineVSelect(SDNode *N,
15257 DAGCombinerInfo &DCI) const {
15258 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
15259 assert(Subtarget.hasP9Altivec() &&
15260 "Only combine this when P9 altivec supported!");
15261
15262 SelectionDAG &DAG = DCI.DAG;
15263 SDLoc dl(N);
15264 SDValue Cond = N->getOperand(0);
15265 SDValue TrueOpnd = N->getOperand(1);
15266 SDValue FalseOpnd = N->getOperand(2);
15267 EVT VT = N->getOperand(1).getValueType();
15268
15269 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
15270 FalseOpnd.getOpcode() != ISD::SUB)
15271 return SDValue();
15272
15273 // ABSD only available for type v4i32/v8i16/v16i8
15274 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
15275 return SDValue();
15276
15277 // At least to save one more dependent computation
15278 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
15279 return SDValue();
15280
15281 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15282
15283 // Can only handle unsigned comparison here
15284 switch (CC) {
15285 default:
15286 return SDValue();
15287 case ISD::SETUGT:
15288 case ISD::SETUGE:
15289 break;
15290 case ISD::SETULT:
15291 case ISD::SETULE:
15292 std::swap(TrueOpnd, FalseOpnd);
15293 break;
15294 }
15295
15296 SDValue CmpOpnd1 = Cond.getOperand(0);
15297 SDValue CmpOpnd2 = Cond.getOperand(1);
15298
15299 // SETCC CmpOpnd1 CmpOpnd2 cond
15300 // TrueOpnd = CmpOpnd1 - CmpOpnd2
15301 // FalseOpnd = CmpOpnd2 - CmpOpnd1
15302 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
15303 TrueOpnd.getOperand(1) == CmpOpnd2 &&
15304 FalseOpnd.getOperand(0) == CmpOpnd2 &&
15305 FalseOpnd.getOperand(1) == CmpOpnd1) {
15306 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
15307 CmpOpnd1, CmpOpnd2,
15308 DAG.getTargetConstant(0, dl, MVT::i32));
15309 }
15310
15311 return SDValue();
15312}
unsigned const MachineRegisterInfo * MRI
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static const unsigned PerfectShuffleTable[6561+1]
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
unsigned first
unsigned second
IRTranslator LLVM IR MI
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
static bool isConstantOrUndef(const SDValue Op)
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static const MCPhysReg QFPR[]
QFPR - The set of QPX registers that should be allocated for arguments.
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS)
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, bool isPatchPoint, bool hasNest, SmallVectorImpl< std::pair< unsigned, SDValue > > &RegsToPass, SmallVectorImpl< SDValue > &Ops, std::vector< EVT > &NodeTys, ImmutableCallSite CS, const PPCSubtarget &Subtarget)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments, on Darwin.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
static void setUsesTOCBasePtr(MachineFunction &MF)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue GA)
cl::opt< bool > ANDIGlueBug
static cl::opt< bool > EnableQuadPrecision("enable-ppc-quad-precision", cl::desc("enable quad precision float support on ppc"), cl::Hidden)
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildSplatI - Build a canonical splati of Val with an element size of SplatSize.
static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
uint32_t Size
Definition: Profile.cpp:46
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Whole Quad Mode
static bool isSplat(ArrayRef< Value * > VL)
Shadow Stack GC Lowering
static bool Enabled
Definition: Statistic.cpp:50
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:168
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:364
static bool is64Bit(const char *name)
unsigned getPrefLoopAlignment() const
bool isPosZero() const
Definition: APFloat.h:1168
Class for arbitrary precision integers.
Definition: APInt.h:69
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1461
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:860
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1799
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:477
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:463
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:839
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
bool isMemLoc() const
unsigned getValNo() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1344
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
IterTy arg_begin() const
Definition: CallSite.h:584
IterTy arg_end() const
Definition: CallSite.h:588
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:104
unsigned arg_size() const
Definition: CallSite.h:226
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:279
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:232
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:746
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:788
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:766
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
TargetLoweringBase::ArgListTy ArgListTy
Definition: FastISel.h:69
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
arg_iterator arg_begin()
Definition: Function.h:695
size_t arg_size() const
Definition: Function.h:722
const GlobalValue * getGlobal() const
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:236
LinkageTypes getLinkage() const
Definition: GlobalValue.h:460
StringRef getSection() const
Definition: Globals.cpp:161
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:546
bool hasComdat() const
Definition: GlobalValue.h:229
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition: IRBuilder.h:406
Type * getVoidTy()
Fetch the type representing void.
Definition: IRBuilder.h:416
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2193
Establish a view to a call site for examination.
Definition: CallSite.h:897
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
NodeT & get() const
get - Dereference as a NodeT reference.
Definition: IntervalMap.h:526
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Base class for LoadSDNode and StoreSDNode.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:133
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:94
block_iterator block_end() const
Definition: LoopInfo.h:157
block_iterator block_begin() const
Definition: LoopInfo.h:156
Context object for machine code objects.
Definition: MCContext.h:64
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static mvt_range integer_valuetypes()
unsigned getScalarSizeInBits() const
static mvt_range vector_valuetypes()
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
unsigned getSizeInBits() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static mvt_range fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
MachineModuleInfo & getMMI() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:66
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateImm(int64_t Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void setFramePointerSaveIndex(int Idx)
bool useLongCalls() const
Definition: PPCSubtarget.h:296
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:237
bool hasQPX() const
Definition: PPCSubtarget.h:247
unsigned char classifyGlobalReference(const GlobalValue *GV) const
classifyGlobalReference - Classify a global variable reference for the current subtarget accourding t...
bool hasFPCVT() const
Definition: PPCSubtarget.h:243
bool isAIXABI() const
Definition: PPCSubtarget.h:315
bool useSoftFloat() const
Definition: PPCSubtarget.h:213
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:218
bool hasAltivec() const
Definition: PPCSubtarget.h:244
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:181
bool hasLazyResolverStub(const GlobalValue *GV) const
hasLazyResolverStub - Return true if accesses to the specified global have to go through a dyld lazy ...
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:297
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
Definition: PPCSubtarget.h:249
bool hasFSQRT() const
Definition: PPCSubtarget.h:234
bool hasP9Vector() const
Definition: PPCSubtarget.h:253
bool hasFRE() const
Definition: PPCSubtarget.h:235
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:238
unsigned getDarwinDirective() const
getDarwinDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:173
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:184
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:222
bool hasRecipPrec() const
Definition: PPCSubtarget.h:239
bool hasSTFIWX() const
Definition: PPCSubtarget.h:240
bool isSVR4ABI() const
Definition: PPCSubtarget.h:316
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:271
bool isDarwinABI() const
Definition: PPCSubtarget.h:314
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:301
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:276
bool hasSPE() const
Definition: PPCSubtarget.h:245
bool hasLFIWAX() const
Definition: PPCSubtarget.h:241
bool isLittleEndian() const
Definition: PPCSubtarget.h:230
bool hasFCPSGN() const
Definition: PPCSubtarget.h:233
bool isTargetLinux() const
Definition: PPCSubtarget.h:312
bool hasP9Altivec() const
Definition: PPCSubtarget.h:254
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:306
bool isTargetELF() const
Definition: PPCSubtarget.h:310
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:211
bool hasFPRND() const
Definition: PPCSubtarget.h:242
bool isELFv2ABI() const
bool hasP8Vector() const
Definition: PPCSubtarget.h:250
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
Definition: PPCSubtarget.h:236
bool hasLDBRX() const
Definition: PPCSubtarget.h:260
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:191
bool isISA3_0() const
Definition: PPCSubtarget.h:295
bool hasVSX() const
Definition: PPCSubtarget.h:248
bool hasDirectMove() const
Definition: PPCSubtarget.h:277
bool hasP8Altivec() const
Definition: PPCSubtarget.h:251
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
unsigned getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const override
Return the register ID of the name passed in.
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type.
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, const AttributeList &FuncAttributes) const override
getOptimalMemOpType - Returns the target specific optimal type for load and store operations as a res...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, unsigned EncodingAlignment=0) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, unsigned EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool isJumpTableRelative() const override
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
const PseudoSourceValue * getGOT()
Return a pseudo source value referencing the global offset table (or something the like).
const PseudoSourceValue * getStack()
Return a pseudo source value referencing the area below the stack frame of a function,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
const SDNodeFlags getFlags() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:415
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:689
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
Create a stack temporary, suitable for holding the specified value type.
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:655
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:416
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:878
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:862
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:750
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:413
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:644
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:649
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:850
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:414
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:695
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:988
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:592
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:410
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:721
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
LLVMContext * getContext() const
Definition: SelectionDAG.h:420
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:638
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:963
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:473
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:417
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
const_iterator begin() const
Definition: SmallSet.h:223
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void clear()
Definition: SmallSet.h:218
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
const_iterator end() const
Definition: SmallSet.h:229
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
size_t size() const
Definition: SmallVector.h:52
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:315
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
void push_back(const T &Elt)
Definition: SmallVector.h:211
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:837
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
Class to represent struct types.
Definition: DerivedTypes.h:233
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
virtual bool isJumpTableRelative() const
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setUseUnderscoreSetJmp(bool Val)
Indicate whether this target prefers to use _setjmp to implement llvm.setjmp or the version without _...
void setPrefLoopAlignment(unsigned Align)
Set the target's preferred loop alignment.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setUseUnderscoreLongJmp(bool Val)
Indicate whether this target prefers to use _longjmp to implement llvm.longjmp or the version without...
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset,...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void setPrefFunctionAlignment(unsigned Align)
Set the target's preferred function alignment.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove,...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:146
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:149
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:72
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
self_iterator getIterator()
Definition: ilist_node.h:81
arg_iterator - Iterates through arguments stored inside an ArgList.
Definition: ArgList.h:40
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char IsConst[]
Key for Kernel::Arg::Metadata::mIsConst.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:467
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:730
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:726
@ TargetConstantPool
Definition: ISDOpcodes.h:134
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:570
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:113
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:340
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:437
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:759
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
@ RETURNADDR
Definition: ISDOpcodes.h:72
@ GlobalAddress
Definition: ISDOpcodes.h:61
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:502
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:369
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:417
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:789
@ GlobalTLSAddress
Definition: ISDOpcodes.h:61
@ FrameIndex
Definition: ISDOpcodes.h:61
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:489
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:399
@ TargetExternalSymbol
Definition: ISDOpcodes.h:135
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:657
@ TargetJumpTable
Definition: ISDOpcodes.h:133
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:807
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:636
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:678
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:666
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:183
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:755
@ FP_ROUND_INREG
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:577
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:404
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:434
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:391
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:382
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:363
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:459
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:827
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:651
@ ConstantPool
Definition: ISDOpcodes.h:62
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:408
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:580
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:95
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:72
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:778
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:798
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:356
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:49
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:543
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:107
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:750
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
@ BlockAddress
Definition: ISDOpcodes.h:62
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:480
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:56
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:336
@ AssertZext
Definition: ISDOpcodes.h:56
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:892
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:795
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:131
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable,...
Definition: ISDOpcodes.h:351
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1028
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:995
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:970
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1034
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1043
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
@ MO_NLP_FLAG
MO_NLP_FLAG - If this bit is set, the symbol reference is actually to the non_lazy_ptr for the global...
Definition: PPC.h:99
@ MO_NLP_HIDDEN_FLAG
MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a symbol with hidden visibility.
Definition: PPC.h:104
@ MO_TPREL_HA
Definition: PPC.h:114
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:91
@ MO_TLS
Definition: PPC.h:123
@ MO_TPREL_LO
Definition: PPC.h:113
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:110
@ MO_HA
Definition: PPC.h:111
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:95
@ QBFLT
QBFLT = Access the underlying QPX floating-point boolean representation.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ QVGPCI
QVGPCI = This corresponds to the QPX qvgpci instruction.
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ ANDIo_1_EQ_BIT
i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after execu...
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ QVFPERM
QVFPERM = This corresponds to the QPX qvfperm instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ VCMPo
RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the altivec VCMP*o instructions.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
@ FP_EXTEND_LH
Custom extend v4f32 to v2f64.
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ QVLFSb
QBRC, CHAIN = QVLFSb CHAIN, Ptr The 4xf32 load used for v4i1 constants.
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ XXREVERSE
XXREVERSE - The PPC VSX reverse instruction.
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ SExtVElems
SExtVElems, takes an input vector of a smaller type and sign extends to an output vector of a larger ...
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ GlobalBaseReg
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry,...
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ QVALIGNI
QVALIGNI = This corresponds to the QPX qvaligni instruction.
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ QVESPLATI
QVESPLATI = This corresponds to the QPX qvesplati instruction.
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the specified isSplatShuffleMask...
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
int isQVALIGNIShuffleMask(SDNode *N)
If this is a qvaligni shuffle mask, return the shift amount, otherwise return -1.
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ GeneralDynamic
Definition: CodeGen.h:43
Reg
All possible values of the reg field in the ModR/M byte.
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAcquireOrStronger(AtomicOrdering ao)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:605
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1199
const NoneType None
Definition: None.h:23
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1258
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1050
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:119
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Mod
The access may modify the value stored in memory.
@ Z
zlib style complession
FormattedString left_justify(StringRef Str, unsigned Width)
left_justify - append spaces after string so total output is Width characters.
Definition: Format.h:144
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:737
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isReleaseOrStronger(AtomicOrdering ao)
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:753
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
#define N
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:170
Extended Value Type.
Definition: ValueTypes.h:33
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:72
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:135
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:114
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:217
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:130
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:145
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
bool isInConsecutiveRegs() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
unsigned getByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:65
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoNaNs(bool b)
void setNoInfs(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)